### Import GDELT Data

In [1]:
import os
from datetime import datetime

import pandas as pd
from google.cloud import bigquery

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "festive-firefly-477711-u3-638c848094c7.json"
client = bigquery.Client(project="festive-firefly-477711-u3")


start_date = 20241101000000
end_date = 20241201000000

query = f"""
SELECT
  DATE,
  DocumentIdentifier AS url,
  V2Themes AS themes,
  V2Persons AS persons,
  V2Locations AS locations,
  V2Tone AS tone
FROM
  `gdelt-bq.gdeltv2.gkg`
WHERE
  DATE >= {start_date}
  AND DATE < {end_date}
LIMIT 500000;
"""


print(f"Fetching raw ALL-GKG data from {start_date} ‚Üí {end_date}")
start_time = datetime.now()

df = client.query(query).to_dataframe(create_bqstorage_client=False)
elapsed = (datetime.now() - start_time).seconds

print(f"‚úÖ Query completed in {elapsed} seconds | Rows fetched: {len(df)}")


filename = f"gdelt_gkg_all_{start_date}_to_{end_date}.csv"
df.to_csv(filename, index=False)
print(f"üíæ Saved raw file: {filename}")

Fetching raw ALL-GKG data from 20241101000000 ‚Üí 20241201000000


KeyboardInterrupt: 

### Merging All The Chunks Collected

In [None]:
import polars as pl
import os

folder = "/Users/bhavikjain/Desktop/Python_Project/Final Year Project/gdelt_data"

csv_files = sorted([os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".csv")])
lazy_frames = [pl.scan_csv(f) for f in csv_files]
merged = pl.concat(lazy_frames, how="vertical")
merged.sink_parquet(os.path.join(folder, "gdelt_gkg_all_merged.parquet"),compression="zstd")
print("Merger Complete")


### Load the Dataset

In [None]:
df = pl.read_parquet("gdelt_data/gdelt_gkg_all_merged.parquet")
df

### Seprating The Tone in Dataset

In [None]:
df = df.with_columns([
    pl.col("tone").str.split(",").list.get(i).cast(pl.Float64).alias(name)
    for i, name in enumerate(["Tone_Avg", "Tone_Pos", "Tone_Neg", "Tone_Pol", "Tone_Intensity", "Tone_Words"])
])

# Clean & filter
df = df.filter(pl.col("Tone_Avg").is_not_null() & (pl.col("url") != ""))

print("Made Separate Columns For Tones")

### Saving The Dataset

In [None]:
# # Select key columns
# df = df.select(["DATE", "url", "themes", "persons", "locations", "Tone_Avg", "Tone_Pos", "Tone_Neg", "Tone_Pol", "Tone_Intensity"])
#
# # Save efficiently
# df.sink_csv("/Users/bhavikjain/Desktop/Python_Project/Final Year Project/gdelt_data/gdelt_gkg_all_cleaned.csv")
#
# print("Saved The Dataset")

### Check the Schema Of the Dataset

In [None]:
df.schema

### Counting Null Values

In [None]:
df_nulls = df.select([pl.col(col).null_count().alias(f"{col}_nulls") for col in df.columns])
print(df_nulls)


### Filling NA Values instead of null values

In [None]:
df = df.with_columns([
    pl.col("themes").fill_null("NA"),
    pl.col("persons").fill_null("NA"),
    pl.col("locations").fill_null("NA")
])
df.write_parquet("gdelt_data/gdelt_intermediate cleaned.parquet")
print("File Saved Successfully ")

### Check Basic Info Of Dataset

In [None]:
df_final = pl.read_parquet("gdelt_data/gdelt_intermediate cleaned.parquet")
print(df_final.collect_schema())
print(df_final.head())

### Finding If Any Duplicates Exist

In [None]:
df_urls = df_final.select("url")
duplicate_find = df_urls["url"].is_duplicated()
duplicate_count = duplicate_find.sum()
print(f"Duplicate URLs found: {duplicate_count}")


In [None]:
print(df_final.describe())
df = df_final.write_parquet("gdelt_data/gdelt_final_cleaned.parquet")
print("File Cleaned And Saved Successfully ")

# Mapping The Sectors

In [None]:
#  Define macro sector patterns based on themes
SECTOR_GROUPS = {
    "Energy": ["ENV", "POWER", "PIPELINE", "COAL", "OIL", "MINING"],
    "Manufacturing": ["INDUSTRY", "TRADE", "EXPORT", "IMPORT", "PRODUCTION"],
    "Technology": ["TECH", "CYBER", "INTERNET", "INFO", "SOFTWARE"],
    "Finance": ["ECON", "BANK", "FINANCE", "TAX", "EPU"],
    "Healthcare": ["HEALTH", "MED", "DRUG", "PHARMA"],
    "Defense": ["MIL", "SECURITY", "DEFENSE"],
    "Agriculture": ["FOOD", "WATER", "AGRICULTURE"],
    "Public Sector": ["POL", "GOV", "STATE", "PUBLIC"],
    "Social": ["HUMAN", "GENDER", "CRIME", "UNREST"],
    "Infrastructure": ["ROAD", "RAIL", "URBAN", "INFRA"],
}

# Start with "Misc"
expr = pl.lit("Misc")

# Apply the mapping based on 'themes'
for sector, patterns in SECTOR_GROUPS.items():
    pattern_regex = "|".join(patterns)
    expr = pl.when(pl.col("themes").str.contains(f"(?i){pattern_regex}", literal=False)) \
             .then(pl.lit(sector)) \
             .otherwise(expr)

# Add new column
df = df.with_columns(expr.alias("MacroSector"))

# Collect and save
df_result = df.collect()
print("Macro-sector classification complete!")
print(df_result["MacroSector"].value_counts().sort("count", descending=True))

df_result.write_parquet("gdelt_data/gdelt_gkg_with_macrosector.parquet")
print("Saved The File")


# See Total Sectors In the gdelt

In [None]:
df = pl.scan_parquet("gdelt_data/gdelt_gkg_with_macrosector.parquet")

unique_sectors = df.select(pl.col("MacroSector").unique()).collect()

print("Total Sectors Found:", len(unique_sectors["MacroSector"]))
for s in sorted(unique_sectors["MacroSector"].to_list()):
    print("-", s)


### Simplify Sentiment ‚Üí Positive / Negative

In [None]:
df = pl.read_parquet("gdelt_data/gdelt_gkg_with_macrosector.parquet")

sentiment_expr = (
    pl.when(pl.col("Tone_Avg") > 1.5)
      .then(pl.lit("Positive"))
      .when(pl.col("Tone_Avg") < -1.5)
      .then(pl.lit("Negative"))
      .otherwise(pl.lit("Neutral"))
)
df = df.with_columns(sentiment_expr.alias("Sentiment_Label"))
print(df["Sentiment_Label"].value_counts())

### Extract Country Names from locations

In [None]:
import polars as pl
df = pl.scan_parquet("gdelt_data/gdelt_gkg_with_macrosector.parquet")

df = df.with_columns(
    pl.col("locations").str.split(";").list.last().alias("last_record")
)

df = df.with_columns([
    pl.col("last_record").str.extract(r"^[0-9]+#([^#]+)#").alias("Country_Name"),
    pl.col("last_record").str.extract(r"^[0-9]+#[^#]+#([A-Z]{2})#").alias("Country_Code")
])

df = df.with_columns(
    pl.col("Country_Name")
      .str.replace_all(r"[^A-Za-z\s,]", "")
      .str.strip_chars()
)

df_clean = (
    df.filter(
        (pl.col("Country_Name").is_not_null()) &
        (~pl.col("Country_Name").str.to_uppercase().is_in(["NA","NULL","NONE"]))
    )
    .select(["locations", "Country_Code", "Country_Name"])
)

df_clean = df_clean.collect()

print(df_clean.head())
df.sink_parquet("gdelt_data/gdelt_phase1_enriched.parquet", compression="zstd")

### Seeing The Data of Parquet File

In [None]:
import polars as pl

# Load the enriched file after block 3
df = pl.read_parquet("gdelt_data/gdelt_phase1_enriched.parquet")

print(df.shape)        # number of rows, columns
print(df.columns)      # list of all columns
print(df.head(10))     # first 10 rows
print(df.describe())   # numeric summary


# Phase :- 2   "India Investment Friendliness Index‚Äù

In [7]:
import polars as pl

# Load your processed GDELT dataset
df = pl.scan_parquet("gdelt_data/gdelt_phase1_enriched.parquet")

# Extract date as proper Date type
df = df.with_columns([
    pl.col("DATE").cast(pl.Utf8).str.slice(0, 8).alias("date_str"),
    pl.col("DATE").cast(pl.Utf8).str.slice(0, 8).str.strptime(pl.Date, "%Y%m%d").alias("date")
])

# Filter out null country codes (NA)
df = df.filter(pl.col("Country_Code").is_not_null())

df.collect()


DATE,url,themes,persons,locations,tone,Tone_Avg,Tone_Pos,Tone_Neg,Tone_Pol,Tone_Intensity,Tone_Words,MacroSector,last_record,Country_Name,Country_Code,date_str,date
i64,str,str,str,str,str,f64,f64,f64,f64,f64,f64,str,str,str,str,str,date
20241204133000,"""https://www.021.rs/story/Info/‚Ä¶","""DEMOCRACY,286;USPEC_POLITICS_G‚Ä¶","""NA""","""1#Serbian#RB#RB####RB#203;1#Ge‚Ä¶","""-4.22535211267606,1.4084507042‚Ä¶",-4.225352,1.408451,5.633803,7.042254,25.352113,0.0,"""Public Sector""","""1#Serbia#RI#RI##44#21#RI#468""","""Serbia""","""RI""","""20241204""",2024-12-04
20241204133000,"""https://www.021.rs/Novi%20Sad/‚Ä¶","""MANMADE_DISASTER_IMPLIED,1339;‚Ä¶","""NA""","""1#Serbian#RB#RB####RB#1137;1#S‚Ä¶","""-0.955109837631328,4.393505253‚Ä¶",-0.95511,4.393505,5.348615,9.74212,23.782235,2.674308,"""Finance""","""4#Vojvodina, 00, Serbia#RI#RI0‚Ä¶","""Vojvodina, , Serbia""","""RI""","""20241204""",2024-12-04
20241204133000,"""https://www.021.rs/story/BBC/3‚Ä¶","""NA""","""Ko Bojani,5728""","""1#Serbian#RB#RB####RB#652;1#Se‚Ä¶","""-3.67474589523065,1.7200938232‚Ä¶",-3.674746,1.720094,5.39484,7.114934,22.048475,0.312744,"""Misc""","""4#Vranje, 00, Serbia#RI#RI00#4‚Ä¶","""Vranje, , Serbia""","""RI""","""20241204""",2024-12-04
20241204133000,"""https://www.15min.lt/naujiena/‚Ä¶","""SEIZE,210;TAX_WORLDLANGUAGES_S‚Ä¶","""Bashar Assad,248""","""1#Syria#SY#SY##35#38#SY#5;1#Sy‚Ä¶","""-11.864406779661,1.27118644067‚Ä¶",-11.864407,1.271186,13.135593,14.40678,23.305085,0.0,"""Public Sector""","""1#Syrian#SY#SY##35#38#SY#1083""","""Syrian""","""SY""","""20241204""",2024-12-04
20241204133000,"""https://www.15min.lt/naujiena/‚Ä¶","""TAX_FNCACT_OFFICIALS,769;GENER‚Ä¶","""Baltic States,678""","""0#Georgia, , Georgia#GG#GG##42‚Ä¶","""-4.15647921760391,2.2004889975‚Ä¶",-4.156479,2.200489,6.356968,8.557457,20.537897,0.0,"""Social""","""1#Lithuania#LH#LH##56#24#LH#24‚Ä¶","""Lithuania""","""LH""","""20241204""",2024-12-04
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
20251030161500,"""https://theturtleislandnews.co‚Ä¶","""TAX_FNCACT_LEADER,762;TAX_FNCA‚Ä¶","""Mike Stimpson,16;Sol Mamakwa,7‚Ä¶","""4#Toronto, Ontario, Canada#CA#‚Ä¶","""-0.505050505050505,0.505050505‚Ä¶",-0.505051,0.505051,1.010101,1.515152,23.737374,2.525253,"""Healthcare""","""1#Canada#CA#CA##60#-96#CA#970""","""Canada""","""CA""","""20251030""",2025-10-30
20251030161500,"""https://www.indianapolismonthl‚Ä¶","""PROTEST,920;STRIKE,920;TAX_FNC‚Ä¶","""Nicole Cicak,1069;Clarence Dar‚Ä¶","""3#Art Institute Of Chicago, Il‚Ä¶","""5.23690773067332,6.23441396508‚Ä¶",5.236908,6.234414,0.997506,7.23192,23.690773,0.997506,"""Agriculture""","""3#Chicago, Illinois, United St‚Ä¶","""Chicago, Illinois, United Stat‚Ä¶","""US""","""20251030""",2025-10-30
20251030161500,"""https://www.yahoo.com/news/art‚Ä¶","""USPEC_POLICY1,389;USPEC_POLICY‚Ä¶","""Randall Woodfin,194""","""2#Alabama, United States#US#US‚Ä¶","""-5,1.66666666666667,6.66666666‚Ä¶",-5.0,1.666667,6.666667,8.333333,19.166667,0.0,"""Public Sector""","""2#Alabama, United States#US#US‚Ä¶","""Alabama, United States""","""US""","""20251030""",2025-10-30
20251030161500,"""https://diena.lt/naujienos/pas‚Ä¶","""TAX_ETHNICITY_BLACK,328;USPEC_‚Ä¶","""Benjamin Netanyahu,208;Benjami‚Ä¶","""1#Israel#IS#IS##31.5#34.75#IS#‚Ä¶","""-3.76569037656904,3.3472803347‚Ä¶",-3.76569,3.34728,7.112971,10.460251,25.523013,0.0,"""Public Sector""","""4#Gaza, Israel (General), Isra‚Ä¶","""Gaza, Israel General, Israel""","""IS""","""20251030""",2025-10-30


# Focus on India-Centric Context

In [8]:
import polars as pl

countries_df = (
    df.filter(pl.col("Country_Code") != "IN")
      .group_by(["Country_Code", "MacroSector", "date"])
      .agg(pl.len().alias("count"))
      .sort("count", descending=True)
      .collect()
)


countries_df

Country_Code,MacroSector,date,count
str,str,date,u32
"""US""","""Infrastructure""",2025-09-30,15676
"""US""","""Infrastructure""",2025-07-31,15467
"""US""","""Public Sector""",2025-07-31,14861
"""US""","""Public Sector""",2025-10-31,14209
"""US""","""Infrastructure""",2025-10-31,14155
…,…,…,…
"""VC""","""Social""",2025-01-01,1
"""LA""","""Social""",2024-12-07,1
"""GT""","""Social""",2025-01-24,1
"""KG""","""Defense""",2024-12-31,1


# Aggregate Daily Tone by Country

In [9]:
daily_country_tone = (
    df.filter(pl.col("Country_Code") != "IN")
      .group_by(["Country_Code", "MacroSector", "date"])
      .agg([
          pl.len().alias("n_articles"),
          pl.col("Tone_Pol").mean().alias("tone_mean"),
          pl.col("Tone_Pol").abs().mean().alias("tone_magnitude"),
          pl.col("Tone_Pol").std().alias("tone_volatility"),
          (pl.col("Tone_Pol") > 1.5).mean().alias("pos_rate"),
          (pl.col("Tone_Pol") < -1.5).mean().alias("neg_rate"),
          ((pl.col("Tone_Pol") > 1.5).mean() /
           (pl.col("Tone_Pol") < -1.5).mean().clip(0.0001, None)).alias("pos_neg_ratio"),
          ((pl.col("Tone_Pol") > 3) | (pl.col("Tone_Pol") < -3)).sum().alias("extreme_events"),
      ])
)

# Write lazily to Parquet
daily_country_tone.sink_parquet("gdelt_data/india_country_tone_daily.parquet")

print("‚úÖ Saved lazily: gdelt_data/india_country_tone_daily.parquet")


‚úÖ Saved lazily: gdelt_data/india_country_tone_daily.parquet


In [10]:
import polars as pl
q = pl.scan_parquet("gdelt_data/india_country_tone_daily.parquet")
df_preview = q.fetch(5)
print(df_preview)

shape: (5, 11)
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ Country_Co ‚îÜ MacroSect ‚îÜ date      ‚îÜ n_article ‚îÜ ‚Ä¶ ‚îÜ pos_rate ‚îÜ neg_rate ‚îÜ pos_neg_r ‚îÜ extreme_e ‚îÇ
‚îÇ de         ‚îÜ or        ‚îÜ ---       ‚îÜ s         ‚îÜ   ‚îÜ ---      ‚îÜ ---      ‚îÜ atio      ‚îÜ vents     ‚îÇ
‚îÇ ---        ‚îÜ ---       ‚îÜ date      ‚îÜ ---       ‚îÜ   ‚îÜ f64      ‚îÜ f64      ‚îÜ ---       ‚îÜ ---       ‚îÇ
‚îÇ str        ‚îÜ str       ‚îÜ           ‚îÜ u32       ‚îÜ   ‚îÜ          ‚îÜ          ‚îÜ f64       ‚îÜ u32       ‚îÇ
‚ïû‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï™‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï™‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï™‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï™‚ïê‚ïê‚ïê‚ï™‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï™‚ïê

  df_preview = q.fetch(5)


# Compute Rolling Sentiment Trends

In [11]:
import polars as pl

schema = {
    "date": pl.Date,
    "Country_Code": pl.Utf8,
    "MacroSector": pl.Utf8,
    "n_articles": pl.UInt32,
    "tone_mean": pl.Float64,
    "pos_rate": pl.Float64,
    "neg_rate": pl.Float64,
    "tone_7d": pl.Float64,
    "tone_30d": pl.Float64,
    "tone_trend": pl.Float64,
}

rolling_df = (
    daily_country_tone
        .sort(["Country_Code", "MacroSector", "date"])
        .with_columns([
            pl.col("tone_mean").rolling_mean(7).over(["Country_Code", "MacroSector"]).alias("tone_7d"),
            pl.col("tone_mean").rolling_mean(30).over(["Country_Code", "MacroSector"]).alias("tone_30d"),
            (
                pl.col("tone_mean").rolling_mean(7).over(["Country_Code", "MacroSector"])
                - pl.col("tone_mean").rolling_mean(30).over(["Country_Code", "MacroSector"])
            ).alias("tone_trend")
        ])
)

rolling_df.sink_parquet("gdelt_data/india_country_tone_trends.parquet")
print("‚úÖ Saved lazily: gdelt_data/india_country_tone_trends.parquet")


‚úÖ Saved lazily: gdelt_data/india_country_tone_trends.parquet


In [12]:
print("Columns:", rolling_df.collect_schema().names())
print("Column count:", len(rolling_df.collect_schema().names()))
print(rolling_df.head(45).collect())

Columns: ['Country_Code', 'MacroSector', 'date', 'n_articles', 'tone_mean', 'tone_magnitude', 'tone_volatility', 'pos_rate', 'neg_rate', 'pos_neg_ratio', 'extreme_events', 'tone_7d', 'tone_30d', 'tone_trend']
Column count: 14
shape: (45, 14)
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ Country_Co ‚îÜ MacroSect ‚îÜ date      ‚îÜ n_article ‚îÜ ‚Ä¶ ‚îÜ extreme_e ‚îÜ tone_7d  ‚îÜ tone_30d ‚îÜ tone_tren ‚îÇ
‚îÇ de         ‚îÜ or        ‚îÜ ---       ‚îÜ s         ‚îÜ   ‚îÜ vents     ‚îÜ ---      ‚îÜ ---      ‚îÜ d         ‚îÇ
‚îÇ ---        ‚îÜ ---       ‚îÜ date      ‚îÜ ---       ‚îÜ   ‚îÜ ---       ‚îÜ f64      ‚îÜ f64      ‚îÜ ---       ‚îÇ
‚îÇ str        ‚îÜ str       ‚îÜ           ‚îÜ u32       ‚îÜ   ‚îÜ u32       ‚îÜ          ‚î

# Calculate Investment Friendliness Index

### IFI=0.6√ótone_trend+0.2√ópos_rate‚àí0.2√óneg_rate

In [13]:
# Compute Investment Friendliness Index (lazy mode)
df_final = rolling_df.with_columns([
    (
        0.6 * pl.col("tone_trend") +
        0.2 * pl.col("pos_rate") -
        0.2 * pl.col("neg_rate")
    ).alias("Investment_Friendliness_Index")
])


# Save lazily to Parquet (triggers computation)
df_final.sink_parquet("gdelt_data/india_investment_index.parquet")
print("üíæ Saved lazily: gdelt_data/india_investment_index.parquet")

üíæ Saved lazily: gdelt_data/india_investment_index.parquet


# View Current Global Rankings

In [14]:
schema = {
    "date": pl.Date,
    "Country_Code": pl.Utf8,
    "MacroSector": pl.Utf8,
    "n_articles": pl.UInt32,
    "tone_mean": pl.Float64,
    "pos_rate": pl.Float64,
    "neg_rate": pl.Float64,
    "tone_7d": pl.Float64,
    "tone_30d": pl.Float64,
    "tone_trend": pl.Float64,
}
rolling_df = (
    daily_country_tone
        .sort(["Country_Code", "date"])
        .with_columns([
            pl.col("tone_mean").rolling_mean(window_size=7).over("Country_Code").alias("tone_7d"),
            pl.col("tone_mean").rolling_mean(window_size=30).over("Country_Code").alias("tone_30d"),
            (
                pl.col("tone_mean").rolling_mean(window_size=7).over("Country_Code")
                - pl.col("tone_mean").rolling_mean(window_size=30).over("Country_Code")
            ).alias("tone_trend")
        ])
)

rolling_df.sink_parquet("gdelt_data/india_country_tone_trends.parquet")


In [15]:
df_final.count()

# Phase 4:- ML model

‚úÖ What We Will Do in Phase 4
Step 1 ‚Äî Load your final dataset
(a single CSV)
Step 2 ‚Äî Encode categorical columns
Country_Code ‚Üí one-hot or embedding
MacroSector ‚Üí one-hot encoding
Step 3 ‚Äî Split into train/test
Step 4 ‚Äî Train XGBoost Regressor
Target = Investment_Friendliness_Index
Step 5 ‚Äî Evaluate using
RMSE
MAE
R¬≤ Score
Step 6 ‚Äî Save the model (.pkl)
To be used in Phase 3 (FastAPI backend).

### Import Statements

In [3]:
import polars as pl
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import root_mean_squared_error,mean_absolute_error, r2_score
import xgboost as xgb
import joblib

### 1Ô∏è‚É£ Load Dataset

In [4]:
df = pl.read_parquet("gdelt_data/gdelt_phase2_final.parquet")

In [5]:
df.schema

Schema([('date', Date),
        ('Country_Code', String),
        ('MacroSector', String),
        ('n_articles', UInt32),
        ('tone_mean', Float64),
        ('pos_rate', Float64),
        ('neg_rate', Float64),
        ('tone_7d', Float64),
        ('tone_30d', Float64),
        ('tone_trend', Float64),
        ('Investment_Friendliness_Index', Float64)])

In [6]:
df = df.filter(pl.col("Investment_Friendliness_Index").is_not_null())

In [7]:
df_pd = df.to_pandas()

In [8]:
df_pd

Unnamed: 0,date,Country_Code,MacroSector,n_articles,tone_mean,pos_rate,neg_rate,tone_7d,tone_30d,tone_trend,Investment_Friendliness_Index
0,2024-12-07,AA,Agriculture,1,6.967985,1.000000,0.0,6.967985,6.967985,0.000000,0.200000
1,2024-12-07,AA,Public Sector,1,1.428571,0.000000,0.0,4.198278,4.198278,0.000000,0.000000
2,2024-12-10,AA,Agriculture,1,2.777778,1.000000,0.0,3.724778,3.724778,0.000000,0.200000
3,2024-12-10,AA,Healthcare,2,2.751665,0.500000,0.0,3.481500,3.481500,0.000000,0.100000
4,2024-12-16,AA,Infrastructure,1,7.530120,1.000000,0.0,4.291224,4.291224,0.000000,0.200000
...,...,...,...,...,...,...,...,...,...,...,...
126269,2025-10-31,ZI,Misc,14,4.625588,0.928571,0.0,6.349383,6.630213,-0.280830,0.017216
126270,2025-10-31,ZI,Public Sector,39,6.302863,1.000000,0.0,6.072868,6.576456,-0.503589,-0.102153
126271,2025-10-31,ZI,Healthcare,6,8.468479,1.000000,0.0,6.144835,6.585088,-0.440252,-0.064151
126272,2025-10-31,ZI,Finance,11,6.369807,1.000000,0.0,7.054808,6.556533,0.498274,0.498965


### X and Y Feature

In [9]:
FEATURES = [
    "Country_Code",
    "MacroSector",
    "n_articles",
    "tone_mean",
    "pos_rate",
    "neg_rate",
    "tone_7d",
    "tone_30d",
    "tone_trend"
]

TARGET = "Investment_Friendliness_Index"

X = df_pd[FEATURES]
y = df_pd[TARGET]


### Train, Test, Split

In [10]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

### Preprocessing The Model

In [11]:
# 4Ô∏è‚É£ Preprocessing + Model
categorical_cols = ["Country_Code", "MacroSector"]
numeric_cols = list(set(FEATURES) - set(categorical_cols))

preprocess = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols),
        ("num", "passthrough", numeric_cols)
    ]
)

In [12]:
model = xgb.XGBRegressor(
    n_estimators=300,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.85,
    colsample_bytree=0.85,
    objective="reg:squarederror",
    random_state=42
)

pipeline = Pipeline(steps=[
    ("preprocess", preprocess),
    ("model", model)
])

### Train My Model

In [13]:
pipeline.fit(X_train, y_train)

0,1,2
,steps,"[('preprocess', ...), ('model', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('cat', ...), ('num', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,categories,'auto'
,drop,
,sparse_output,True
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.85
,device,
,early_stopping_rounds,
,enable_categorical,False


### Evaluation

In [14]:
preds = pipeline.predict(X_test)

rmse = root_mean_squared_error(y_test, preds)
mae = mean_absolute_error(y_test, preds)
r2  = r2_score(y_test, preds)

print(f"RMSE: {rmse:.4f}")
print(f"MAE:  {mae:.4f}")
print(f"R¬≤:   {r2:.4f}")

RMSE: 0.0147
MAE:  0.0046
R¬≤:   0.9989


### Save Model

In [15]:
joblib.dump(pipeline, "ifi_xgb_model.pkl")
print("\nüíæ Saved model as ifi_xgb_model.pkl")


üíæ Saved model as ifi_xgb_model.pkl


### Saving 30 Day Data

In [16]:
import polars as pl
from datetime import timedelta

# 1. Load your full daily aggregated data
# (This file was created in Phase 2/3 of your notebook)
history_path = "gdelt_data/india_country_tone_daily.parquet"
df = pl.read_parquet(history_path)

# 2. Find the latest date in your dataset
max_date = df["date"].max()
cutoff_date = max_date - timedelta(days=30)

print(f"üìÖ Latest Date in Data: {max_date}")
print(f"‚úÇÔ∏è Cutting data before: {cutoff_date}")

# 3. Filter for only the last 30 days
# We need this snapshot to calculate trends for new incoming data
df_state = df.filter(pl.col("date") >= cutoff_date)

# 4. Save to a small, fast "State File"
state_path = "gdelt_state_30d.parquet"
df_state.write_parquet(state_path)

print(f"‚úÖ State file created: {state_path}")
print(f"üìä Rows retained: {len(df_state)}")

üìÖ Latest Date in Data: 2025-10-31
‚úÇÔ∏è Cutting data before: 2025-10-01
‚úÖ State file created: gdelt_state_30d.parquet
üìä Rows retained: 3019


In [17]:
def predict(self, headline):
        # A. Analyze the new headline
        country, sector, tone, pos_rate, neg_rate = self._analyze_text(headline)
        print(f"\nüîç Input Analysis: Country={country} | Sector={sector} | Tone={tone:.4f}")

        # B. Get Context from Memory
        context = self.memory.filter(
            (pl.col("Country_Code") == country) &
            (pl.col("MacroSector") == sector)
        ).sort("date")

        # C. Create "Today's" Data Row
        today_row = pl.DataFrame({
            "date": [date.today()],
            "Country_Code": [country],
            "MacroSector": [sector],
            "n_articles": [1],
            "tone_mean": [tone],
            "pos_rate": [pos_rate],
            "neg_rate": [neg_rate]
        })

        # --- FIX: Cast n_articles to UInt32 to match history schema ---
        today_row = today_row.with_columns(pl.col("n_articles").cast(pl.UInt32))

        # D. Merge History + Today
        if len(context) > 0:
            # Select only common columns to ensure safe concat
            cols = today_row.columns
            combined = pl.concat([context.select(cols), today_row])
        else:
            combined = today_row
            print("‚ö†Ô∏è No 30-day history found for this specific sector. Trend will be 0.")

        # E. Calculate Features (Trend)
        tone_7d = combined.tail(7)["tone_mean"].mean()
        tone_30d = combined.tail(30)["tone_mean"].mean()
        tone_trend = tone_7d - tone_30d

        # F. Prepare XGBoost Input
        input_df = pd.DataFrame([{
            "Country_Code": country,
            "MacroSector": sector,
            "n_articles": 1,
            "tone_mean": tone,
            "pos_rate": pos_rate,
            "neg_rate": neg_rate,
            "tone_7d": tone_7d,
            "tone_30d": tone_30d,
            "tone_trend": tone_trend
        }])

        # G. Predict
        ifi_score = self.model.predict(input_df)[0]

        return {
            "Headline": headline,
            "Predicted_IFI": round(float(ifi_score), 4),
            "Tone_Trend": round(tone_trend, 4),
            "Recommendation": "YES" if ifi_score > 0.05 else "NO"
        }

## 1. Country extraction (dynamic, no hardcoding)

In [18]:
import spacy

nlp = spacy.load("en_core_web_sm")

def extract_country_dynamic(text):
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ == "GPE":
            try:
                return pycountry.countries.lookup(ent.text).alpha_2
            except:
                continue
    return "OTHER"

In [19]:
from transformers import pipeline

sector_classifier = pipeline(
    "zero-shot-classification",
    model="facebook/bart-large-mnli"
)

SECTOR_LABELS = [
    "Agriculture", "Technology", "Energy", "Finance",
    "Healthcare", "Retail", "Manufacturing", "Transportation",
    "Infrastructure", "Defense", "Public Sector"
]

import re

# Same logic you used in Phase 2
SECTOR_GROUPS = {
    "Energy": ["ENV", "POWER", "PIPELINE", "COAL", "OIL", "MINING", "GAS"],
    "Manufacturing": ["INDUSTRY", "TRADE", "EXPORT", "IMPORT", "PRODUCTION", "FACTORY"],
    "Technology": ["TECH", "CYBER", "INTERNET", "INFO", "SOFTWARE", "AI", "DIGITAL"],
    "Finance": ["ECON", "BANK", "FINANCE", "TAX", "EPU", "MARKET", "MONEY"],
    "Healthcare": ["HEALTH", "MED", "DRUG", "PHARMA", "HOSPITAL", "VIRUS"],
    "Defense": ["MIL", "SECURITY", "DEFENSE", "WAR", "ARMY", "WEAPON"],
    "Agriculture": ["FOOD", "WATER", "AGRICULTURE", "FARM", "CROP", "WHEAT"],
    "Public Sector": ["POL", "GOV", "STATE", "PUBLIC", "LAW", "ELECTION"],
    "Social": ["HUMAN", "GENDER", "CRIME", "UNREST", "PROTEST", "RIGHTS"],
    "Infrastructure": ["ROAD", "RAIL", "URBAN", "INFRA", "BUILDING", "CONSTRUCTION"],
}

def extract_sector_fast(text):
    text_upper = text.upper()

    for sector, keywords in SECTOR_GROUPS.items():
        # Create a regex pattern to find any of the keywords
        pattern = r"|".join(re.escape(k) for k in keywords)
        if re.search(pattern, text_upper):
            return sector

    return "Misc" # Default if no keyword matches

Device set to use cpu


In [20]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Load ONLY ONCE
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
finbert = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

def estimate_tone_finbert(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True)
    with torch.no_grad(): # vital for speed/memory
        outputs = finbert(**inputs)
        probs = torch.softmax(outputs.logits, dim=1).detach().numpy()[0]

    # FinBERT output order: [Positive, Negative, Neutral] or [Neg, Neu, Pos]
    # ProsusAI/finbert labels are: 0: positive, 1: negative, 2: neutral (usually)
    # BUT check config. usually it's: 0=Positive, 1=Negative, 2=Neutral
    # Let's rely on model config labels if possible, but standard FinBERT is:
    # 0=Positive, 1=Negative, 2=Neutral

    # Let's assume standard output for now based on your previous code
    # Your previous code had: neg, neutral, pos = probs (Check this mapping!)
    # Standard FinBERT is actually: Positive, Negative, Neutral.
    # Let's stick to your previous logic but optimize the call.

    neg = probs[1]
    neutral = probs[2]
    pos = probs[0]

    tone_mean = pos - neg
    return float(tone_mean), float(pos), float(neg)

In [24]:
import joblib

# Load the model
ifi_model = joblib.load("ifi_xgb_model.pkl")

# Print it to verify
print("Model loaded successfully!")
print(ifi_model)

Model loaded successfully!
Pipeline(steps=[('preprocess',
                 ColumnTransformer(transformers=[('cat',
                                                  OneHotEncoder(handle_unknown='ignore'),
                                                  ['Country_Code',
                                                   'MacroSector']),
                                                 ('num', 'passthrough',
                                                  ['n_articles', 'neg_rate',
                                                   'tone_mean', 'tone_trend',
                                                   'tone_30d', 'tone_7d',
                                                   'pos_rate'])])),
                ('model',
                 XGBRegressor(base_score=None, booster=None, callbacks=None,
                              colsample_bylevel=None, colsample_byn...
                              feature_types=None, feature_weights=None,
                              gamma=None, gro

In [25]:
import pandas as pd
import joblib
import re
import spacy
import torch
import pycountry
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# --- 1. Load Resources ---
# (Assuming resources are loaded as per previous blocks)
# ifi_model, nlp, bert_tokenizer, bert_model are already loaded in memory

# --- 2. Existing Helper Functions (Keep these as they were) ---
# (SECTOR_GROUPS, DEMONYM_MAPPER, extract functions... keeping them consistent)
SECTOR_GROUPS = {
    "Energy": ["ENV", "POWER", "PIPELINE", "COAL", "OIL", "MINING", "GAS", "FUEL"],
    "Manufacturing": ["INDUSTRY", "TRADE", "EXPORT", "IMPORT", "PRODUCTION", "FACTORY", "TARIFFS"],
    "Technology": ["TECH", "CYBER", "INTERNET", "INFO", "SOFTWARE", "AI", "DIGITAL"],
    "Finance": ["ECON", "BANK", "FINANCE", "TAX", "EPU", "MARKET", "MONEY"],
    "Healthcare": ["HEALTH", "MED", "DRUG", "PHARMA", "HOSPITAL", "VIRUS"],
    "Defense": ["MIL", "SECURITY", "DEFENSE", "WAR", "ARMY", "WEAPON"],
    "Agriculture": ["FOOD", "WATER", "AGRICULTURE", "FARM", "CROP", "WHEAT"],
    "Public Sector": ["POL", "GOV", "STATE", "PUBLIC", "LAW", "ELECTION"],
    "Social": ["HUMAN", "GENDER", "CRIME", "UNREST", "PROTEST", "RIGHTS"],
    "Infrastructure": ["ROAD", "RAIL", "URBAN", "INFRA", "BUILDING", "CONSTRUCTION"],
}

DEMONYM_MAPPER = {
    "Russian": "Russia", "Indian": "India", "American": "United States", "Chinese": "China",
    "Japanese": "Japan", "French": "France", "German": "Germany", "British": "United Kingdom"
}

TRADE_BARRIERS = ["tariff", "tax", "sanction", "ban", "embargo", "penalty", "duty", "levy", "restriction", "hike"]

bert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
bert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

def extract_sectors_multi(text):
    text_upper = text.upper()
    found = []
    for sector, keywords in SECTOR_GROUPS.items():
        if re.search(r"|".join(re.escape(k) for k in keywords), text_upper):
            found.append(sector)
    return found if found else ["Misc"]

def extract_countries_multi(text):
    doc = nlp(text)
    found = set()
    for ent in doc.ents:
        if ent.label_ in ["GPE", "NORP"]:
            txt = ent.text.strip()
            if txt in DEMONYM_MAPPER: txt = DEMONYM_MAPPER[txt]
            try: found.add(pycountry.countries.lookup(txt).alpha_2)
            except: continue
    return list(found) if found else ["OTHER"]

def estimate_tone_fast(text):
    inputs = bert_tokenizer(text, return_tensors="pt", truncation=True)
    with torch.no_grad():
        probs = torch.softmax(bert_model(**inputs).logits, dim=1).detach().numpy()[0]
    return float(probs[0] - probs[1])

def adjust_sentiment_rules(text, tone):
    text_lower = text.lower()
    if any(w in text_lower for w in TRADE_BARRIERS):
        if any(v in text_lower for v in ["impose", "hike", "raise", "increase", "against"]):
            return -2.0 # Force negative
    return tone

def predict_raw(text):
    # This just gets the data, doesn't print report
    sectors = extract_sectors_multi(text)
    countries = extract_countries_multi(text)
    raw_tone = estimate_tone_fast(text)
    adj_tone = adjust_sentiment_rules(text, raw_tone)

    results = []
    for country in countries:
        for sector in sectors:
            trend = -1.0 if adj_tone == -2.0 else 0.0
            input_df = pd.DataFrame([{
                "Country_Code": country, "MacroSector": sector, "n_articles": 1,
                "tone_mean": adj_tone, "pos_rate": 1.0 if adj_tone>0 else 0.0,
                "neg_rate": 1.0 if adj_tone<0 else 0.0,
                "tone_7d": adj_tone, "tone_30d": adj_tone, "tone_trend": trend
            }])
            score = ifi_model.predict(input_df)[0]
            results.append({"Country": country, "Sector": sector, "Score": float(score)})
    return results

# =======================================================
# üöÄ 3. THE NEW INDIA-CENTRIC REPORT GENERATOR
# =======================================================

def get_india_business_verdict(text):
    print(f"\nüì∞ NEWS: {text}")
    print("=" * 60)

    # 1. Get Raw Data (This returns a list of dicts, one for each Sector/Country)
    data = predict_raw(text)

    # 2. Check if India is involved
    all_countries = [d['Country'] for d in data]
    if "IN" not in all_countries:
        print("‚ÑπÔ∏è  NOTE: This news does not explicitly mention India.")
        return

    # 3. Identify the Partner (The Non-India Country)
    partners = [c for c in all_countries if c != "IN"]
    partner_code = partners[0] if partners else "DOMESTIC"

    if partner_code != "DOMESTIC":
        partner_name = pycountry.countries.get(alpha_2=partner_code).name.upper()
    else:
        partner_name = "DOMESTIC MARKET"

    # 4. Filter only India's results
    india_results = [d for d in data if d['Country'] == "IN"]

    # 5. LOOP THROUGH EVERY SECTOR INDIVIDUALLY
    for item in india_results:
        sector = item['Sector']
        score = item['Score']

        print(f"üáÆüá≥ INDIA vs {partner_name} | üè≠ SECTOR: {sector.upper()}")

        # Verdict Logic
        if score > 0.05:
            print(f"   ‚úÖ VERDICT: YES (Recommended)")
            print(f"   üí° ACTION:  India SHOULD increase business in {sector}.")
            print(f"      Reason:  Positive sentiment detected (Score: {score:.4f})")

        elif score < -0.05:
            print(f"   ‚õî VERDICT: NO (High Risk)")
            print(f"   üí° ACTION:  India SHOULD REDUCE exposure in {sector}.")
            print(f"      Reason:  Negative sentiment/Tariffs detected (Score: {score:.4f})")

        else:
            print(f"   ‚ö†Ô∏è VERDICT: MAYBE (Wait & Watch)")
            print(f"   üí° ACTION:  Maintain status quo.")

        print("-" * 60) # Separator between sectors

In [26]:
# =======================================================
# üöÄ 4. TEST CASES
# =======================================================

# Case 1: Good Relations
text1 = "Russian President Vladimir Putin said he will discuss expanding Indian imports with PM Narendra Modi"
get_india_business_verdict(text1)


üì∞ NEWS: Russian President Vladimir Putin said he will discuss expanding Indian imports with PM Narendra Modi
üáÆüá≥ INDIA vs DOMESTIC MARKET | üè≠ SECTOR: MANUFACTURING
   ‚úÖ VERDICT: YES (Recommended)
   üí° ACTION:  India SHOULD increase business in Manufacturing.
      Reason:  Positive sentiment detected (Score: 0.1852)
------------------------------------------------------------
üáÆüá≥ INDIA vs DOMESTIC MARKET | üè≠ SECTOR: TECHNOLOGY
   ‚úÖ VERDICT: YES (Recommended)
   üí° ACTION:  India SHOULD increase business in Technology.
      Reason:  Positive sentiment detected (Score: 0.1852)
------------------------------------------------------------


In [27]:
# Case 2: Bad Relations (Tariffs)
text2 = "US hikes India tariffs to 50%, putting focus on key food, fuel, feedstock trade flows"
get_india_business_verdict(text2)


üì∞ NEWS: US hikes India tariffs to 50%, putting focus on key food, fuel, feedstock trade flows
üáÆüá≥ INDIA vs UNITED STATES | üè≠ SECTOR: ENERGY
   ‚õî VERDICT: NO (High Risk)
   üí° ACTION:  India SHOULD REDUCE exposure in Energy.
      Reason:  Negative sentiment/Tariffs detected (Score: -0.5852)
------------------------------------------------------------
üáÆüá≥ INDIA vs UNITED STATES | üè≠ SECTOR: MANUFACTURING
   ‚õî VERDICT: NO (High Risk)
   üí° ACTION:  India SHOULD REDUCE exposure in Manufacturing.
      Reason:  Negative sentiment/Tariffs detected (Score: -0.5852)
------------------------------------------------------------
üáÆüá≥ INDIA vs UNITED STATES | üè≠ SECTOR: AGRICULTURE
   ‚õî VERDICT: NO (High Risk)
   üí° ACTION:  India SHOULD REDUCE exposure in Agriculture.
      Reason:  Negative sentiment/Tariffs detected (Score: -0.5844)
------------------------------------------------------------


In [28]:
# Case 3: Domestic News
text3 = "India announces massive subsidy for new semiconductor factories"
get_india_business_verdict(text3)


üì∞ NEWS: India announces massive subsidy for new semiconductor factories
üáÆüá≥ INDIA vs DOMESTIC MARKET | üè≠ SECTOR: MISC
   ‚úÖ VERDICT: YES (Recommended)
   üí° ACTION:  India SHOULD increase business in Misc.
      Reason:  Positive sentiment detected (Score: 0.1852)
------------------------------------------------------------


In [29]:
text = "India will not ‚Äòbow down‚Äô to US after steep tariffs, trade minister says"
get_india_business_verdict(text)


üì∞ NEWS: India will not ‚Äòbow down‚Äô to US after steep tariffs, trade minister says
üáÆüá≥ INDIA vs UNITED STATES | üè≠ SECTOR: MANUFACTURING
   ‚ö†Ô∏è VERDICT: MAYBE (Wait & Watch)
   üí° ACTION:  Maintain status quo.
------------------------------------------------------------


In [30]:
text = "US hikes India tariffs to 50%, putting focus on key food, fuel, feedstock trade flows"
get_india_business_verdict(text)


üì∞ NEWS: US hikes India tariffs to 50%, putting focus on key food, fuel, feedstock trade flows
üáÆüá≥ INDIA vs UNITED STATES | üè≠ SECTOR: ENERGY
   ‚õî VERDICT: NO (High Risk)
   üí° ACTION:  India SHOULD REDUCE exposure in Energy.
      Reason:  Negative sentiment/Tariffs detected (Score: -0.5852)
------------------------------------------------------------
üáÆüá≥ INDIA vs UNITED STATES | üè≠ SECTOR: MANUFACTURING
   ‚õî VERDICT: NO (High Risk)
   üí° ACTION:  India SHOULD REDUCE exposure in Manufacturing.
      Reason:  Negative sentiment/Tariffs detected (Score: -0.5852)
------------------------------------------------------------
üáÆüá≥ INDIA vs UNITED STATES | üè≠ SECTOR: AGRICULTURE
   ‚õî VERDICT: NO (High Risk)
   üí° ACTION:  India SHOULD REDUCE exposure in Agriculture.
      Reason:  Negative sentiment/Tariffs detected (Score: -0.5844)
------------------------------------------------------------
