<a href="https://colab.research.google.com/github/Noshi26/Blessed_Hands/blob/main/WorldWatch_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import pandas as pd
import numpy as np

# --- 1. Load your dataset ---
# Make sure 'News_2025.csv' is uploaded to your Colab session files.
df = pd.read_csv('News_2025.csv')
# Convert the 'publishedAt' column to a proper datetime format
df['publishedAt'] = pd.to_datetime(df['publishedAt'])

# --- 2. Define the Knowledge Base ---
# This dictionary is the core "brain" of our AI.
# Format: { 'Category': { 'keyword': risk_score, ... } }
CATEGORY_KEYWORDS = {
    'War': {
        'war': 10, 'attack': 9, 'conflict': 8, 'strike': 9, 'military': 7,
        'invasion': 10, 'casualties': 8, 'airstrike': 9, 'troops': 6, 'battle': 8
    },
    'Peace': {
        'peace': -5, 'treaty': -7, 'agreement': -6, 'ceasefire': -8, 'truce': -8,
        'negotiation': -4, 'diplomacy': -5, 'reconciliation': -6
    },
    'Economic': {
        'economy': 3, 'inflation': 6, 'recession': 7, 'market': 4, 'growth': -2,
        'gdp': 3, 'stock': 4, 'interest rate': 6, 'unemployment': 7
    },
    'Trade': {
        'trade': 2, 'tariff': 5, 'sanction': 6, 'export': 1, 'import': 1,
        'supply chain': 4, 'deal': -3, 'commerce': 2
    },
    'Tech': {
        'technology': 1, 'ai': 2, 'cybersecurity': 6, 'innovation': -1,
        'space': 1, 'semiconductor': 4, 'data': 3
    }
}

print("✅ Phase 1 Complete: Data loaded and knowledge base defined.")

✅ Phase 1 Complete: Data loaded and knowledge base defined.


In [17]:
def get_category_scores(headline, keyword_db):
    """Calculates category scores based on keywords in a headline."""
    headline_lower = headline.lower()
    scores = {category: 0 for category in keyword_db}

    for category, keywords in keyword_db.items():
        for keyword, score in keywords.items():
            if keyword in headline_lower:
                scores[category] += score
    return scores

def classify_and_score(headline, keyword_db):
    """Classifies a headline and assigns a final risk score."""
    scores = get_category_scores(headline, keyword_db)

    # If no keywords were found, classify as 'Neutral'
    if all(score == 0 for score in scores.values()):
        return 'Neutral', 0

    # The primary category is the one with the highest absolute score
    primary_category = max(scores, key=lambda cat: abs(scores[cat]))

    # The final risk score is the sum of all keyword scores found
    total_risk_score = sum(scores.values())

    return primary_category, total_risk_score

print("✅ Phase 2 Complete: Scoring and classification functions are ready.")

✅ Phase 2 Complete: Scoring and classification functions are ready.


In [18]:
# Apply the function to each title in the DataFrame using our initial keyword list
results = df['title'].apply(lambda headline: classify_and_score(headline, CATEGORY_KEYWORDS))
df[['category', 'risk_score']] = pd.DataFrame(results.tolist(), index=df.index)

# Display the results
print("Initial Classification and Scoring Complete. Here are some examples:")
print(df[['title', 'category', 'risk_score']].head(10))

Initial Classification and Scoring Complete. Here are some examples:
                                               title category  risk_score
0  At Chile’s Vera Rubin Observatory, Earth’s Lar...  Neutral           0
1  Israel vows to intensify attacks after Iranian...      War           9
2  Who is Karen Read? Boston woman acquitted of m...  Neutral           0
3  1930s 'Dragon Man' Finally Gives Elusive Ancie...  Neutral           0
4  When Earth iced over, early life may have shel...  Neutral           0
5  Rwanda and DR Congo agree draft peace deal to ...      War           0
6  Nothing Phone (3) has ‘Glyph Matrix’ lights in...  Neutral           0
7  How migrating Australian moths find caves hund...  Neutral           0
8  2 Chinese spacecraft just met up 22,000 miles ...     Tech           1
9  What are the risks of bombing Iran's nuclear s...  Neutral           0


In [19]:
# Install the library for the Genetic Algorithm
!pip install pygad

# Create a small 'ground truth' dataset to judge performance.
# IMPORTANT: You should manually check and correct these to be as accurate as possible!
ground_truth = {
    0: 'Tech',       # "At Chile’s Vera Rubin Observatory, Earth’s Largest..."
    1: 'War',        # "Israel vows to intensify attacks after Iranian..."
    2: 'Neutral',    # "Who is Karen Read? Boston woman acquitted of m..."
    3: 'Tech',       # "1930s 'Dragon Man' Finally Gives Elusive Ancie..."
    4: 'Tech',       # "When Earth iced over, early life may have shel..."
    5: 'Economic',   # Add title for row 5 to help labeling
    6: 'War',        # Add title for row 6
    7: 'Peace',      # Add title for row 7
    8: 'Trade',      # Add title for row 8
    9: 'Economic',   # Add title for row 9
    10: 'War',
    11: 'War',
    12: 'Tech',
    13: 'Economic',
    14: 'Peace'
}

# Create a small sample DataFrame for the GA to use
sample_df = df.iloc[0:15].copy()
sample_df['true_category'] = sample_df.index.map(ground_truth)

print("✅ Phase 3a Complete: Ground truth created and pygad installed.")
print("\nSample data for Genetic Algorithm:")
print(sample_df[['title', 'true_category']])

Collecting pygad
  Downloading pygad-3.4.0-py3-none-any.whl.metadata (23 kB)
Downloading pygad-3.4.0-py3-none-any.whl (86 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/86.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pygad
Successfully installed pygad-3.4.0
✅ Phase 3a Complete: Ground truth created and pygad installed.

Sample data for Genetic Algorithm:
                                                title true_category
0   At Chile’s Vera Rubin Observatory, Earth’s Lar...          Tech
1   Israel vows to intensify attacks after Iranian...           War
2   Who is Karen Read? Boston woman acquitted of m...       Neutral
3   1930s 'Dragon Man' Finally Gives Elusive Ancie...          Tech
4   When Earth iced over, early life may have shel...          Tech
5   Rwanda and DR Congo agree draft peace deal to ...      

In [20]:
import pygad

# Flatten our keyword dictionary into a single list of scores for the GA to optimize
initial_weights = [score for cat_kw in CATEGORY_KEYWORDS.values() for score in cat_kw.values()]

# 1. Define the Fitness Function: This is how we score each potential solution (set of weights)
def fitness_func(ga_instance, solution, solution_idx):
    # Rebuild the keyword DB with the new weights from the GA "solution"
    temp_keywords = {}
    i = 0
    for category, keywords in CATEGORY_KEYWORDS.items():
        temp_keywords[category] = {}
        for keyword in keywords:
            temp_keywords[category][keyword] = solution[i]
            i += 1

    # Calculate accuracy based on this new set of weights
    predictions = sample_df['title'].apply(lambda h: classify_and_score(h, temp_keywords)[0])
    correct_predictions = (predictions == sample_df['true_category']).sum()

    accuracy = correct_predictions / len(sample_df)
    return accuracy

# 2. Configure and Run the Genetic Algorithm
ga_instance = pygad.GA(num_generations=100,
                       num_parents_mating=7,
                       fitness_func=fitness_func,
                       sol_per_pop=20,
                       num_genes=len(initial_weights),
                       init_range_low=-15.0,
                       init_range_high=15.0,
                       parent_selection_type="sss",
                       keep_parents=2,
                       crossover_type="single_point",
                       mutation_type="random",
                       mutation_percent_genes=10)

print("\nRunning Genetic Algorithm to optimize keyword weights...")
ga_instance.run()
print("GA Finished.")

# 3. Get the Best Solution and Update our Knowledge Base
best_solution, best_solution_fitness, best_solution_idx = ga_instance.best_solution()
print(f"✅ Phase 3b Complete: Best accuracy achieved on sample data: {best_solution_fitness:.2%}")

# Update our main keyword dictionary with the GA's winning weights
i = 0
for category in CATEGORY_KEYWORDS:
    for keyword in CATEGORY_KEYWORDS[category]:
        CATEGORY_KEYWORDS[category][keyword] = best_solution[i]
        i += 1

print("\nKnowledge base has been updated with optimized weights.")


Running Genetic Algorithm to optimize keyword weights...
GA Finished.
✅ Phase 3b Complete: Best accuracy achieved on sample data: 13.33%

Knowledge base has been updated with optimized weights.


In [None]:
import time
from IPython.display import display, clear_output

# Re-run the classification on the whole dataset with our NEW, OPTIMIZED weights
print("Re-classifying all articles with optimized knowledge base...")
optimized_results = df['title'].apply(lambda h: classify_and_score(h, CATEGORY_KEYWORDS))
df[['category', 'risk_score']] = pd.DataFrame(optimized_results.tolist(), index=df.index)
print("Classification complete.")
time.sleep(2)


# --- Simulation Loop ---
print("\n--- WorldWatch AI: Real-Time Geopolitical Feed Simulation ---")
time.sleep(3)

# Loop through each article in the dataset
for index, article in df.iterrows():
    clear_output(wait=True) # Clears the output for a clean, updating display

    # Determine color based on category and risk for display
    risk_level = "NEUTRAL"
    if article['category'] == 'War': risk_level = "CRITICAL"
    elif article['category'] == 'Peace': risk_level = "STABLE"
    elif article['risk_score'] > 5: risk_level = "HIGH"
    elif article['risk_score'] > 0: risk_level = "MODERATE"

    # Display the analysis for the "new" article
    print("--- WorldWatch AI: Real-Time Geopolitical Feed Simulation ---")
    print(f"INCOMING [Article {index + 1}/{len(df)}]")
    print("----------------------------------------------------------")
    print(f"HEADLINE: {article['title']}")
    print(f"SOURCE: {article['source']}")
    print("----------------------------------------------------------")
    print(f"ANALYSIS -> Category: {article['category'].upper()} | Risk Level: {risk_level} | Score: {article['risk_score']:.1f}")
    print("----------------------------------------------------------\n")

    # Pause to simulate a real-time feed
    time.sleep(1.5)

print("--- End of Simulation ---")

--- WorldWatch AI: Real-Time Geopolitical Feed Simulation ---
INCOMING [Article 150/195]
----------------------------------------------------------
HEADLINE: Google’s AI Mode can now have back-and-forth voice conversations
SOURCE: TechCrunch
----------------------------------------------------------
ANALYSIS -> Category: TECH | Risk Level: NEUTRAL | Score: -0.1
----------------------------------------------------------

