In [None]:
!pip install transformers accelerate bitsandbytes langchain requests langchain_community langchain-huggingface
import requests
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
from google.colab import userdata

# Global variables to store the loaded model and tokenizer
model = None
tokenizer = None

def load_model_and_tokenizer():
    """Loads the quantized LLaMA model and tokenizer if not already loaded."""
    global model, tokenizer
    if model is None or tokenizer is None:
        print("GPU:", torch.cuda.get_device_name(0))
        # Get your Hugging Face token from Colab secrets
        hf_token = userdata.get('HF_TOKEN')

        # Set model name
        model_name = "meta-llama/Meta-Llama-3-8B-Instruct"

        # Configure 8-bit quantization for T4 GPU compatibility
        quantization_config = BitsAndBytesConfig(load_in_8bit=True)

        # Load tokenizer
        tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
        if tokenizer.pad_token_id is None:
            tokenizer.pad_token_id = tokenizer.eos_token_id

        # Load model with quantization
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            device_map="cuda",
            torch_dtype=torch.float16,
            quantization_config=quantization_config,
            token=hf_token
        )
    else:
        print("Model and tokenizer already loaded.")

def generate_llama_response(prompt, max_new_tokens=100, temperature=0.7, top_p=0.9, do_sample=True):
    """Generates text from a prompt using the loaded LLaMA model."""
    load_model_and_tokenizer()
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
    input_ids = inputs["input_ids"].to("cuda")
    attention_mask = inputs["attention_mask"].to("cuda")

    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            top_p=top_p,
            do_sample=do_sample
        )
    generated_ids = output_ids[0][len(input_ids[0]):]
    response = tokenizer.decode(generated_ids, skip_special_tokens=True)
    return response

def generate_sentiment(tweet_text):
    """Generates sentiment for the tweet text from a broader range of emotions."""
    sentiment_prompt = f"""
Determine the sentiment of the following X post and return it as a string. Possible sentiments include: positive, negative, neutral, happy, sad, angry, excited. Output only the sentiment.

X post: "{tweet_text}"

Sentiment:"""
    raw_sentiment = generate_llama_response(sentiment_prompt, max_new_tokens=10)
    sentiment = raw_sentiment.split()[0].lower() if raw_sentiment.split() else "unknown"

    # Expanded list of valid sentiments
    valid_sentiments = ["positive", "negative", "neutral", "happy", "sad", "angry", "excited"]
    if sentiment not in valid_sentiments:
        sentiment = "unknown"
    return sentiment

def generate_summary(tweet_text):
    """Generates a one-sentence summary of the tweet text."""
    summary_prompt = f"""
Write a short one-sentence summary of the following X post.

X post: "{tweet_text}"

Summary:"""
    raw_summary = generate_llama_response(summary_prompt, max_new_tokens=50)
    summary = raw_summary.strip()
    return summary

def parse_url(url):
    """Parses the X URL to extract username and post ID."""
    parts = url.split("/")
    if len(parts) < 6:
        raise ValueError("Invalid X URL format. Expected: https://x.com/user/status/post_id")
    username = parts[3]
    post_id = parts[5]
    return username, post_id

def get_tweet_text(post_id, bearer_token):
    """Fetches tweet text using X API v2."""
    url = f"https://api.x.com/2/tweets/{post_id}"
    headers = {"Authorization": f"Bearer {bearer_token}"}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()['data']['text']
    else:
        raise Exception(f"Failed to fetch tweet: {response.status_code} - {response.text}")

def main(url):
    """Main function to retrieve tweet and analyze with LLaMA."""
    try:
        # Get bearer token from Colab secrets and strip any whitespace
        bearer_token = userdata.get('BEARER_TOKEN').strip()

        username, post_id = parse_url(url)
        print(f"Username: {username}")
        print(f"Post ID: {post_id}")

        tweet_text = get_tweet_text(post_id, bearer_token)
        print(f"Tweet Text: {tweet_text}")

        print("\n--- LLaMA Model Analysis ---")

        sentiment = generate_sentiment(tweet_text)
        summary = generate_summary(tweet_text)

        print(f"Sentiment: {sentiment}")
        print(f"Summary: {summary}")

    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    x_url = "https://x.com/BillGates/status/1902597037935497324"
    main(x_url)

Username: BillGates
Post ID: 1902597037935497324
Tweet Text: I had a very productive meeting with @jpnadda where we discussed India’s impressive progress in public health. India’s commitment to eliminating infectious diseases, advancing maternal and child healthcare, and leveraging digital health platforms and AI, is creating healthier… https://t.co/DPV5q1XamX https://t.co/ITp6Oynlaq

--- LLaMA Model Analysis ---
GPU: Tesla T4


tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model and tokenizer already loaded.
Sentiment: positive
Summary: The author had a productive meeting with @jpnadda to discuss India's progress in public health, specifically their efforts to eliminate infectious diseases and improve maternal and child healthcare.
