In [12]:
import google.generativeai as genai
import os
from dotenv import load_dotenv
import pandas as pd

In [3]:
load_dotenv()

True

In [4]:
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
if not GEMINI_API_KEY:
    raise ValueError("Please set GEMINI_API_KEY in your .env file")

genai.configure(api_key=GEMINI_API_KEY)

In [None]:
def classify_news_with_gemini(prompt_template, text):
    """Classify news using Google Gemini"""
    prompt = prompt_template.format(text=text[:800]) 
    
    try:
        model = genai.GenerativeModel("gemini-1.5-flash")
        
        
        response = model.generate_content(
            prompt,
            generation_config=genai.types.GenerationConfig(
                max_output_tokens=15,
                temperature=0,
                stop_sequences=["\n"] 
            )
        )
        
        return response.text.strip()
    
    except Exception as e:
        return f"Error: {str(e)}"

In [6]:
prompt1 = """Classify the following news article into exactly one of these categories: 
business, entertainment, politics, sport, tech. 
Answer with only the category name.

Article: {text}
Category:"""

# Prompt 2: Content-Based Analysis
prompt2 = """Analyze the content of this news article and determine its primary category.
Consider the main topics, entities, and context.

Available categories:
- business: Companies, markets, economy, finance
- entertainment: Movies, music, celebrities, arts
- politics: Government, elections, policies, political figures
- sport: Sports events, athletes, games, competitions
- tech: Technology, gadgets, software, innovation

Article: {text}

Based on the content analysis, this belongs to the category:"""

# Prompt 3: Few-Shot Learning with Examples
prompt3 = """I need to classify news articles into categories. Here are examples:

1. "Apple shares rose 5% after strong quarterly earnings" → business
2. "New Marvel movie breaks box office records" → entertainment
3. "President announces new climate policy" → politics
4. "Manchester United wins championship final" → sport
5. "New iPhone features revolutionary AI technology" → tech

Now classify this article: {text}
Category:"""


In [7]:
sample_articles = [
    "Tesla reported record quarterly profits amid growing electric vehicle demand",
    "The new Avengers movie premiered to critical acclaim worldwide",
    "Parliament debates new immigration legislation this week",
    "Liverpool FC signs promising young striker from Barcelona",
    "Microsoft unveils new AI-powered operating system features"
]

true_categories = ['business', 'entertainment', 'politics', 'sport', 'tech']


In [None]:
for prompt_num, prompt in enumerate([prompt1, prompt2, prompt3], 1):
    print(f"\n{'='*50}")
    print(f"TESTING PROMPT {prompt_num}")
    print(f"{'='*50}")
    
    for i, (article, true_cat) in enumerate(zip(sample_articles, true_categories), 1):
        result = classify_news_with_gemini(prompt, article)
        status = "✅" if result.lower() == true_cat.lower() else "❌"
        print(f"{status} Sample {i} (True: {true_cat}): {result}")
        
        import time
        time.sleep(1)


TESTING PROMPT 1
✅ Sample 1 (True: business): Business
✅ Sample 2 (True: entertainment): Entertainment
✅ Sample 3 (True: politics): Politics
✅ Sample 4 (True: sport): Sport
✅ Sample 5 (True: tech): Tech

TESTING PROMPT 2
✅ Sample 1 (True: business): Business
✅ Sample 2 (True: entertainment): Entertainment
✅ Sample 3 (True: politics): Politics
✅ Sample 4 (True: sport): Sport
✅ Sample 5 (True: tech): Tech

TESTING PROMPT 3
❌ Sample 1 (True: business): Category: **business**
❌ Sample 2 (True: entertainment): Category: **entertainment**
✅ Sample 3 (True: politics): Politics
✅ Sample 4 (True: sport): Sport
✅ Sample 5 (True: tech): Tech


In [13]:
df = pd.read_pickle("processed_df.pkl")

In [14]:
real_samples = df.sample(3, random_state=42)
for i, (idx, row) in enumerate(real_samples.iterrows(), 1):
    snippet = row['cleaned_text'][:200] + "..."  # First 200 chars
    true_category = row['category']
    
    result = classify_news_with_gemini(prompt3, snippet)
    status = "✅" if result.lower() == true_category.lower() else "❌"
    
    print(f"\nSample {i}:")
    print(f"True category: {true_category}")
    print(f"Gemini result: {result}")
    print(f"Snippet: {snippet}")
    print(f"Match: {status}")
    
    time.sleep(1)


Sample 1:
True category: business
Gemini result: business
Snippet: house price dip november house price dipped slightly november office deputy prime minister odpm said. average house price fell marginally october. recent evidence suggested housing market slowing inte...
Match: ✅

Sample 2:
True category: business
Gemini result: Business
Snippet: lse set date takeover deal london stock exchange lse planning announce preferred takeover end month newspaper report claim. sunday telegraph said lse plan evidence want retain tight control destiny. d...
Match: ✅

Sample 3:
True category: sport
Gemini result: Sport
Snippet: harinordoquy suffers france axe number eight imanol harinordoquy dropped france squad six nation match ireland dublin march. harinordoquy second half replacement last saturday defeat wales. bourgoin l...
Match: ✅
