<a href="https://colab.research.google.com/github/ItMeansBigMountain/wutHappened/blob/googleColab/news_LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -r requirements.txt
!pip install torch==2.0.1+cu118 -f https://download.pytorch.org/whl/cu118/torch_stable.html
!pip install torchvision
!pip install opencv-python

In [None]:
import os
import json

from NewsApi import NewsApi
from ImageGenerator import ImageGenerator
from ScriptGenerator import ScriptGenerator

import torch
import torch.utils.checkpoint as checkpoint
import gc

In [None]:
# INIT MODEL STORAGE
cache_dir = os.path.abspath("./my_model_cache")
os.environ['TRANSFORMERS_CACHE'] = cache_dir

# INIT DATA INGESTION
news = NewsApi(api=True, webscrape=False)

In [4]:
# INIT AI MODELS
# image_gen = ImageGenerator("stabilityai/stable-diffusion-xl-base-1.0", cache_dir=cache_dir)
image_gen = ImageGenerator("CompVis/stable-diffusion-v1-4", cache_dir=cache_dir)
script_gen = ScriptGenerator("distilgpt2", device=0)


In [5]:
# INIT OUTPUT DIR
output_dir = os.path.abspath("./output/")
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [6]:
# Initialize dictionary to hold article data
article_data = {}

# Fetch news articles
articles = news.fetch_api()

In [7]:
# Generate images and scripts
for idx, article in enumerate(articles):
    with torch.no_grad():
      # GENERATE IMAGE
      if article.get('description') is None:
          image = image_gen.generate_image(article.get('title'))
      else:
          image = image_gen.generate_image(article.get('description'))

      # GENERATE SCRIPT
      if article.get('description') is None:
          script = script_gen.generate_script(
              article.get('title'), max_length=100, num_return_sequences=1
          )
      else:
          script = script_gen.generate_script(
              article.get('description'), max_length=100, num_return_sequences=1
          )

      # Save Images and News Scripts
      image_path = os.path.join(output_dir, f"image_{idx}.png")
      image.save(image_path)

      # Populate dictionary with article data
      article_data[idx] = {
          "title": article.get('title'),
          "original_story": article.get('description'),
          "script": script,
          "image": image_path,
          "news_source": article.get('source'),
          "author": article.get('author'),
      }

      print(f"Saved image and script for article {idx} to {output_dir}")

    # Free up GPU memory
    del image  # Delete the image tensor
    torch.cuda.empty_cache()  # Free up cache

    # Free up CPU memory
    del script  # Delete the script variable
    gc.collect()  # Run garbage collection


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 0 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 1 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 2 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 3 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 4 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 5 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 6 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 7 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 8 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 9 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 10 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 11 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 12 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 13 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 14 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 15 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 16 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 17 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 18 to /content/output


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Saved image and script for article 19 to /content/output


In [8]:
# Save dictionary as JSON file
json_path = os.path.join(output_dir, "article_data.json")
with open(json_path, "w", encoding='utf-8') as f:
    json.dump(article_data, f, ensure_ascii=False, indent=4)