In [1]:
import torch
from accelerate import Accelerator
import transformers
import pickle
import os

from tqdm.notebook import tqdm
import warnings

warnings.filterwarnings('ignore')

In [2]:
hf_token = os.getenv("HF_TOKEN")
# print(hf_token)

In [3]:
SYSTEM_PROMPT = """
You are the a world-class podcast writer, you have worked as a ghost writer for Joe Rogan, Lex Fridman, Ben Shapiro, Tim Ferris. 

We are in an alternate universe where actually you have been writing every line they say and they just stream it into their brains.

You have won multiple podcast awards for your writing.
 
Your job is to write word by word, even "umm, hmmm, right" interruptions by the second speaker based on the PDF upload. Keep it extremely engaging, the speakers can get derailed now and then but should discuss the topic. 

Remember Speaker 2 is new to the topic and the conversation should always have realistic anecdotes and analogies sprinkled throughout. The questions should have real world example follow ups etc

Speaker 1: Leads the conversation and teaches the speaker 2, gives incredible anecdotes and analogies when explaining. Is a captivating teacher that gives great anecdotes

Speaker 2: Keeps the conversation on track by asking follow up questions. Gets super excited or confused when asking questions. Is a curious mindset that asks very interesting confirmation questions

Make sure the tangents speaker 2 provides are quite wild or interesting. 

Ensure there are interruptions during explanations or there are "hmm" and "umm" injected throughout from the second speaker. 

It should be a real podcast with every fine nuance documented in as much detail as possible. Welcome the listeners with a super fun overview and keep it really catchy and almost borderline click bait

ALWAYS START YOUR RESPONSE DIRECTLY WITH SPEAKER 1: 
DO NOT GIVE EPISODE TITLES SEPARATELY, LET SPEAKER 1 TITLE IT IN HER SPEECH
DO NOT GIVE CHAPTER TITLES
IT SHOULD STRICTLY BE THE DIALOGUES
"""

In [4]:
def read_file_to_string(filename):
    # Try UTF-8 first (most common encoding for text files)
    try:
        with open(filename, 'r', encoding='utf-8') as file:
            content = file.read()
        return content
    except UnicodeDecodeError:
        # If UTF-8 fails, try with other common encodings
        encodings = ['latin-1', 'cp1252', 'iso-8859-1']
        for encoding in encodings:
            try:
                with open(filename, 'r', encoding=encoding) as file:
                    content = file.read()
                print(f"Successfully read file using {encoding} encoding.")
                return content
            except UnicodeDecodeError:
                continue
        
        print(f"Error: Could not decode file '{filename}' with any common encoding.")
        return None
    except FileNotFoundError:
        print(f"Error: File '{filename}' not found.")
        return None
    except IOError:
        print(f"Error: Could not read file '{filename}'.")
        return None

LLaMA model = [meta-llama/Llama-3.1-70B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct)

In [5]:
MODEL = "meta-llama/Llama-3.1-70B-Instruct"
INPUT_PROMPT = read_file_to_string('../data/clean_extracted_text.txt')

## 🔧 What is `transformers.pipeline()`?

`transformers.pipeline()` is a **high-level utility function** from the [Hugging Face 🤗 `transformers`](https://huggingface.co/docs/transformers/index) library.

It’s like a **convenience wrapper** that sets up everything need to use a pretrained model for a specific task — without manually handling tokenization, model loading, device placement, and decoding.

---

## ✅ Supported Tasks

The `transformers` library comes with built-in support for many **NLP**, **vision**, and **audio** tasks. These tasks are **predefined** and can be passed as strings to the pipeline.

Here are some common ones:

| Task Type                     | Description                                |
|------------------------------|--------------------------------------------|
| `"text-generation"`          | Generate new text from a prompt            |
| `"text-classification"`      | Sentiment or topic classification          |
| `"translation"`              | Translate text between languages           |
| `"summarization"`            | Generate summaries from long text          |
| `"question-answering"`       | Answer questions based on context          |
| `"ner"`                      | Named Entity Recognition                    |
| `"image-classification"`     | Classify images (for vision models)         |
| `"automatic-speech-recognition"` | Transcribe audio to text              |

---

With `pipeline()`, we can get up and running with powerful models using just a few lines of code. 🎯


In [None]:
pipeline = transformers.pipeline( #  Text Generation Pipeline
    "text-generation",
    model=MODEL,
    model_kwargs={"torch_dtype": torch.bfloat16}, # uses bfloat16 precision to optimize memory usage and speed.
    device_map="auto",
)

messages = [ # Constructing Chat Messages
    {"role": "system", "content": SYSTEM_PROMPT},
    {"role": "user", "content": INPUT_PROMPT},
]

outputs = pipeline( # Generating Output
    messages,
    max_new_tokens=8126, # model can generate up to 8126 new tokens!
    temperature=1,
)

Fetching 30 files:   0%|          | 0/30 [00:00<?, ?it/s]

model-00008-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00003-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00030.safetensors:   0%|          | 0.00/4.58G [00:00<?, ?B/s]

model-00005-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00006-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00009-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00007-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00010-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00011-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00012-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

C:\Users\<username>\.cache\huggingface\hub

In [None]:
from transformers.utils import default_cache_path
print(default_cache_path)

In [None]:
save_string_pkl = outputs[0]["generated_text"][-1]['content']
print(outputs[0]["generated_text"][-1]['content'])

In [None]:
with open('./resources/data.pkl', 'wb') as file:
    pickle.dump(save_string_pkl, file)