In [None]:
def load_data(file_path):
  """
  Load data from a given file path.

  Args:
      file_path (str): The path to the data file.
  
  Returns:
      pd.DataFrame: Loaded data as a pandas DataFrame.
  """
  import pandas as pd

  data = pd.read_csv(file_path)
  return data

In [None]:
def create_event_text(row):
  """Convert GDELT event row to descriptive text for BERT."""
  text = (f"{row['Actor1Name']} (event code {row['EventCode']}) "
          f"towards {row['Actor2Name']}. "
          f"Goldstein scale: {row['GoldsteinScale']}, "
          f"Tone: {row['AvgTone']}")
  return text

In [None]:
def predict_intensity(df, text_column="event_text"):
  """
      Predict event intensity (0-4) using BERT zero-shot classification.

      0: Stable/Cooperative
      1: Verbal Tension
      2: Diplomatic Crisis
      3: Military Posturing
      4: Armed Conflict
  """
  import torch
  from transformers import pipeline

  classifier = pipeline(
      "zero-shot-classification",
      model="facebook/bart-large-mnli",
      device=0 if torch.cuda.is_available() else -1
  )

  labels = [
      "Stable/Cooperative",
      "Verbal Tension",
      "Diplomatic Crisis",
      "Military Posturing",
      "Armed Conflict"
  ]

  predictions = []
  for text in df[text_column]:
    result = classifier(text, labels, multi_label=False)
    top_label = result["labels"][0]
    top_score = float(result["scores"][0])
    predictions.append((top_label, labels.index(top_label), top_score))

  df = df.copy()
  df["predicted_label"] = [p[0] for p in predictions]
  df["predicted_intensity"] = [p[1] for p in predictions]
  df["predicted_score"] = [p[2] for p in predictions]
  return df

# Load data and run a small demo
DATA_PATH = "../../data/raw/irn_isr_gdelt.csv"
df_raw = load_data(DATA_PATH)
df_raw = df_raw.head(100)
df_raw["event_text"] = df_raw.apply(create_event_text, axis=1)
scored_df = predict_intensity(df_raw, text_column="event_text")

print("Sample predictions (first 5 rows):")
print(scored_df[["event_text", "predicted_label", "predicted_intensity", "predicted_score"]].tail())