### Install libraries

In [4]:
import pandas as pd
import torch
import numpy as np
import json
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
from vllm import LLM, SamplingParams
from transformers import AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


INFO 12-05 13:27:10 [importing.py:44] Triton is installed but 0 active driver(s) found (expected 1). Disabling Triton to prevent runtime errors.
INFO 12-05 13:27:10 [importing.py:68] Triton not installed or not compatible; certain GPU-related functions will not be available.


### Load dataset

In [None]:
df = pd.read_csv('.csv')
print(f"Total comments: {len(df)}")

df = df.drop_duplicates(subset=['comment']).reset_index(drop=True)
df = df[df['comment'].str.strip().str.len() > 10].reset_index(drop=True)
print(f"After cleaning: {len(df)}")

sample_size = min(500, len(df))
df_sample = df.sample(n=sample_size, random_state=42)
print(f"\nUsing {sample_size} samples for manual validation")

### Setup Qwen

In [None]:
model_name = "Qwen/Qwen2.5-7B-Instruct"

llm = LLM(
    model=model_name,
    gpu_memory_utilization=0.75,
    max_model_len=2048,
    tensor_parallel_size=1,
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
print("Ready")

### Create prompts

In [None]:
def create_prompt(comment):
    prompt = f"""Classify the stance of this comment about Ukraine-Russia conflict.

Return ONLY one word: prorussian, neutral, or proukrainian

Guidelines:
- prorussian: supports Russia, criticizes Ukraine/Zelenskyy, justifies invasion
- proukrainian: supports Ukraine, criticizes Russia/Putin, condemns invasion
- neutral: balanced view, no clear stance, or discusses both sides equally

Consider sarcasm and irony when present.

Examples:
"Slava Ukraini!" → proukrainian
"Putin is a great leader" → prorussian
"'Great strategist' Putin lost again lol" → proukrainian
"War hurts everyone" → neutral
"Zelenskyy sells Ukraine to NATO" → prorussian

Comment: "{comment}"

Classification:"""

    messages = [{"role": "user", "content": prompt}]
    return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

### Run inference on validation sample

In [None]:

prompts = [create_prompt(c) for c in df_sample['comment'].tolist()]
sampling_params = SamplingParams(temperature=0.2, max_tokens=20, stop=["\n", ".", ","])

outputs = llm.generate(prompts, sampling_params, use_tqdm=True)

labels = []
raw_outputs = []

for output in outputs:
    text = output.outputs[0].text.strip().lower()
    raw_outputs.append(text)

    if 'proukrainian' in text or 'pro-ukrainian' in text or 'ukrainian' in text:
        labels.append('proukrainian')
    elif 'prorussian' in text or 'pro-russian' in text or 'russian' in text:
        labels.append('prorussian')
    else:
        labels.append('neutral')

df_sample['stance_label'] = labels
df_sample['raw_output'] = raw_outputs

### Distribution analysis

In [None]:
counts = df_sample['stance_label'].value_counts()
print(counts)
print(f"\nPercentages:")
for label, count in counts.items():
    print(f"  {label}: {count/len(df_sample)*100:.1f}%")

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

counts.plot(kind='bar', ax=axes[0], color=['#d62728', '#7f7f7f', '#2ca02c'])
axes[0].set_title('Stance Distribution', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Stance')
axes[0].set_ylabel('Count')
axes[0].tick_params(axis='x', rotation=45)

counts.plot(kind='pie', ax=axes[1], autopct='%1.1f%%', colors=['#d62728', '#7f7f7f', '#2ca02c'])
axes[1].set_title('Stance Proportion', fontsize=14, fontweight='bold')
axes[1].set_ylabel('')

plt.tight_layout()
plt.show()

### Split into train/test for metrics

In [None]:
train_df, test_df = train_test_split(df_sample, test_size=0.2, random_state=42, stratify=df_sample['stance_label'])

print(f"\nTrain: {len(train_df)}, Test: {len(test_df)}")

test_comments = test_df['comment'].tolist()
test_prompts = [create_prompt(c) for c in test_comments]

print(f"Running inference on test set ({len(test_df)} samples)...")
test_outputs = llm.generate(test_prompts, sampling_params, use_tqdm=True)

test_predictions = []
for output in test_outputs:
    text = output.outputs[0].text.strip().lower()
    if 'proukrainian' in text or 'pro-ukrainian' in text or 'ukrainian' in text:
        test_predictions.append('proukrainian')
    elif 'prorussian' in text or 'pro-russian' in text or 'russian' in text:
        test_predictions.append('prorussian')
    else:
        test_predictions.append('neutral')

test_df['predicted_stance'] = test_predictions

In [None]:
y_true = test_df['stance_label'].values
y_pred = test_df['predicted_stance'].values

accuracy = accuracy_score(y_true, y_pred)
f1_macro = f1_score(y_true, y_pred, average='macro')
f1_weighted = f1_score(y_true, y_pred, average='weighted')

print(f"\nAccuracy: {accuracy:.3f}")
print(f"F1 (macro): {f1_macro:.3f}")
print(f"F1 (weighted): {f1_weighted:.3f}")

print(f"\n{classification_report(y_true, y_pred)}")

### Confusion matrix

In [None]:
cm = confusion_matrix(y_true, y_pred, labels=['prorussian', 'neutral', 'proukrainian'])

fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['prorussian', 'neutral', 'proukrainian'],
            yticklabels=['prorussian', 'neutral', 'proukrainian'],
            cbar_kws={'label': 'Count'})
ax.set_title('Confusion Matrix - Test Set', fontsize=14, fontweight='bold')
ax.set_ylabel('True Label')
ax.set_xlabel('Predicted Label')
plt.tight_layout()
plt.show()

per_class_accuracy = cm.diagonal() / cm.sum(axis=1)
print("\nPer-class accuracy:")
for i, label in enumerate(['prorussian', 'neutral', 'proukrainian']):
    print(f"  {label}: {per_class_accuracy[i]:.3f}")

### Error analysis

In [None]:
errors = test_df[test_df['stance_label'] != test_df['predicted_stance']].copy()
print(f"\n" + "="*60)
print(f"ERROR ANALYSIS - {len(errors)} errors out of {len(test_df)} ({len(errors)/len(test_df)*100:.1f}%)")
print("="*60)

if len(errors) > 0:
    print(f"\nError breakdown:")
    error_pairs = errors.groupby(['stance_label', 'predicted_stance']).size().sort_values(ascending=False)
    for (true_label, pred_label), count in error_pairs.items():
        print(f"  {true_label} → {pred_label}: {count}")

    print(f"\nExample errors (showing up to 10):")
    for idx, row in errors.head(10).iterrows():
        comment = row['comment'][:80] + '...' if len(row['comment']) > 80 else row['comment']
        print(f"\nComment: {comment}")
        print(f"True: {row['stance_label']} | Predicted: {row['predicted_stance']}")
        print("-" * 60)

In [None]:
all_prompts = [create_prompt(c) for c in df['comment'].tolist()]
all_outputs = llm.generate(all_prompts, sampling_params, use_tqdm=True)

all_labels = []
for output in all_outputs:
    text = output.outputs[0].text.strip().lower()
    if 'proukrainian' in text or 'pro-ukrainian' in text or 'ukrainian' in text:
        all_labels.append('proukrainian')
    elif 'prorussian' in text or 'pro-russian' in text or 'russian' in text:
        all_labels.append('prorussian')
    else:
        all_labels.append('neutral')

df['stance_label'] = all_labels

df.to_csv('data/final_dataset_with_stance.csv', index=False)

### Final statistics and visualization

In [None]:
final_counts = df['stance_label'].value_counts()
print(f"\n{final_counts}")
print(f"\nPercentages:")
for label, count in final_counts.items():
    print(f"  {label}: {count/len(df)*100:.1f}%")

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

final_counts.plot(kind='bar', ax=axes[0, 0], color=['#d62728', '#7f7f7f', '#2ca02c'])
axes[0, 0].set_title('Final Dataset - Stance Distribution', fontsize=12, fontweight='bold')
axes[0, 0].set_xlabel('Stance')
axes[0, 0].set_ylabel('Count')
axes[0, 0].tick_params(axis='x', rotation=45)

final_counts.plot(kind='pie', ax=axes[0, 1], autopct='%1.1f%%', colors=['#d62728', '#7f7f7f', '#2ca02c'])
axes[0, 1].set_title('Final Dataset - Stance Proportion', fontsize=12, fontweight='bold')
axes[0, 1].set_ylabel('')

comparison_data = pd.DataFrame({
    'Validation Sample': df_sample['stance_label'].value_counts(),
    'Full Dataset': final_counts
}).fillna(0)

comparison_data.plot(kind='bar', ax=axes[1, 0], color=['#1f77b4', '#ff7f0e'])
axes[1, 0].set_title('Validation vs Full Dataset', fontsize=12, fontweight='bold')
axes[1, 0].set_xlabel('Stance')
axes[1, 0].set_ylabel('Count')
axes[1, 0].legend()
axes[1, 0].tick_params(axis='x', rotation=45)

cm = confusion_matrix(y_true, y_pred, labels=['prorussian', 'neutral', 'proukrainian'])
sns.heatmap(cm, annot=True, fmt='d', cmap='Greens', ax=axes[1, 1],
            xticklabels=['prorussian', 'neutral', 'proukrainian'],
            yticklabels=['prorussian', 'neutral', 'proukrainian'],
            cbar_kws={'label': 'Count'})
axes[1, 1].set_title('Confusion Matrix (Test Set)', fontsize=12, fontweight='bold')
axes[1, 1].set_ylabel('True')
axes[1, 1].set_xlabel('Predicted')

plt.tight_layout()
plt.show()