# Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import json
import random
from sentence_transformers import SentenceTransformer, InputExample, losses, evaluation # this smarter  then using countVectorizer or tfidfvectorizer 
from torch.utils.data import DataLoader
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import time


# Loading the datasets from the same directory
the dataset is generated waitin for the Ministry to give us real data

In [None]:

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (16, 12)

# === CONFIGURATION ===
CONFIG = {
    "base_model": "paraphrase-multilingual-MiniLM-L12-v2",
    "epochs": 5,
    "batch_size": 16,
    "warmup_steps": 100,
    "evaluation_steps": 200,
    "output_path": "./fine_tuned_youth_events_model",
    "train_split": 0.8
}

print("="*60)
print("FINE-TUNING SENTENCE TRANSFORMER FOR YOUTH EVENTS")
print("="*60)
print(f"\nüìã Configuration:")
for key, value in CONFIG.items():
    print(f"   {key}: {value}")

# === LOAD DATA ===
print("\nüìÇ Loading dataset...")
users_df = pd.read_csv('users_1000.csv')
users_df['interests'] = users_df['interests'].apply(eval)

events_df = pd.read_csv('events_1000.csv')
events_df['category'] = events_df['category'].apply(eval)

annexes_df = pd.read_csv('annexes_48.csv')

# Use improved ground truth
try:
    with open('ground_truth_improved.json', 'r', encoding='utf-8') as f:
        ground_truth = json.load(f)
        print("   ‚úì Using improved ground truth")
except FileNotFoundError:
    with open('ground_truth_1000.json', 'r', encoding='utf-8') as f:
        ground_truth = json.load(f)
        print("   ‚ö†Ô∏è  Using original ground truth (run diagnose script first!)")

ground_truth = {int(k): v for k, v in ground_truth.items()}

events_df = events_df.merge(
    annexes_df[["annex_id", "wilaya", "annex_name"]],
    on="annex_id",
    how="left"
)

print(f"   ‚úì Loaded {len(users_df)} users, {len(events_df)} events")

# === CREATE TRAINING EXAMPLES ===
print("\nüî® Creating training examples...")

def create_training_examples(users_df, events_df, ground_truth, num_negative_samples=2):
    """
    Create training pairs:
    - Positive: (user_interests, relevant_event) with label=1.0
    - Negative: (user_interests, irrelevant_event) with label=0.0
    - Medium: (user_interests, partially_relevant_event) with label=0.3-0.7
    """
    training_examples = []
    
    for idx, user in users_df.iterrows():
        if idx % 100 == 0:
            print(f"   Processing user {idx}/{len(users_df)}...")
        
        user_id = user["id"]
        user_profile = " ".join(user["interests"])
        user_interests_set = set(user["interests"])
        
        true_events = ground_truth.get(user_id, [])
        if not true_events:
            continue
        
        # POSITIVE EXAMPLES: Ground truth events
        for event_id in true_events[:3]:
            event = events_df[events_df['id'] == event_id]
            if len(event) == 0:
                continue
            event = event.iloc[0]
            
            event_text = f"{event['title']} {' '.join(event['category'])}"
            training_examples.append(
                InputExample(texts=[user_profile, event_text], label=1.0)
            )
        
        # NEGATIVE EXAMPLES: Random events NOT in ground truth
        all_event_ids = set(events_df['id'].tolist())
        negative_event_ids = list(all_event_ids - set(true_events))
        
        for _ in range(num_negative_samples):
            neg_event_id = random.choice(negative_event_ids)
            neg_event = events_df[events_df['id'] == neg_event_id].iloc[0]
            
            # Check if truly negative (no interest overlap)
            event_cats = set(neg_event['category'])
            overlap = len(user_interests_set & event_cats)
            
            if overlap == 0:
                neg_event_text = f"{neg_event['title']} {' '.join(neg_event['category'])}"
                training_examples.append(
                    InputExample(texts=[user_profile, neg_event_text], label=0.0)
                )
        
        # MEDIUM EXAMPLES: Partial overlap
        for event_id in negative_event_ids[:2]:
            event = events_df[events_df['id'] == event_id]
            if len(event) == 0:
                continue
            event = event.iloc[0]
            
            event_cats = set(event['category'])
            overlap_ratio = len(user_interests_set & event_cats) / len(user_interests_set) if user_interests_set else 0
            
            if 0 < overlap_ratio < 0.6:
                event_text = f"{event['title']} {' '.join(event['category'])}"
                training_examples.append(
                    InputExample(texts=[user_profile, event_text], label=float(overlap_ratio))
                )
    
    return training_examples

training_examples = create_training_examples(users_df, events_df, ground_truth)
print(f"   ‚úì Created {len(training_examples)} training examples")

# Split train/validation
random.shuffle(training_examples)
split_idx = int(CONFIG['train_split'] * len(training_examples))
train_examples = training_examples[:split_idx]
val_examples = training_examples[split_idx:]

print(f"   ‚úì Training set: {len(train_examples)}")
print(f"   ‚úì Validation set: {len(val_examples)}")

# === EVALUATION FUNCTIONS ===
def evaluate_recommendations(model_path, test_users, events_df, ground_truth, model_name="Model"):
    """Evaluate recommendation performance"""
    model = SentenceTransformer(model_path)
    
    # Create event embeddings
    event_descriptions = (
        events_df["title"] + " " + 
        events_df["category"].apply(lambda x: " ".join(x) + " " + " ".join(x))
    ).str.lower().tolist()
    
    event_embeddings = model.encode(event_descriptions, show_progress_bar=False)
    
    recommendations = {}
    
    for idx, user in test_users.iterrows():
        user_profile = " ".join(user["interests"]) + " " + " ".join(user["interests"])
        user_embedding = model.encode([user_profile.lower()])
        
        semantic_sim = cosine_similarity(user_embedding, event_embeddings).flatten()
        
        location_score = events_df["wilaya"].apply(
            lambda w: 1 if w == user["wilaya"] else 0
        ).values
        
        user_interests = set(user["interests"])
        overlap_scores = events_df["category"].apply(
            lambda cats: len(set(cats) & user_interests) / len(user_interests)
        ).values
        
        final_score = 0.5 * semantic_sim + 0.2 * location_score + 0.3 * overlap_scores
        
        top_indices = final_score.argsort()[-5:][::-1]
        recommendations[user["id"]] = events_df.iloc[top_indices]["id"].tolist()
    

    # Calculate metrics
    precisions = []
    recalls = []
    
    for user_id, pred in recommendations.items():
        true = ground_truth.get(user_id, [])
        if not true:
            continue
        
        correct = len(set(pred) & set(true))
        precision = correct / len(pred) if pred else 0
        recall = correct / len(true) if true else 0
        
        precisions.append(precision)
        recalls.append(recall)
    
    avg_precision = np.mean(precisions)
    avg_recall = np.mean(recalls)
    f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall) if (avg_precision + avg_recall) > 0 else 0
    
    return {
        "precision": avg_precision,
        "recall": avg_recall,
        "f1": f1,
        "precision_scores": precisions,
        "recall_scores": recalls
    }

# === BASELINE EVALUATION ===
print("\nüìä Evaluating BASELINE model (before training)...")
test_users = users_df.sample(200, random_state=42)
baseline_metrics = evaluate_recommendations(
    CONFIG['base_model'], 
    test_users, 
    events_df, 
    ground_truth,
    "Baseline"
)


print(f"   Precision: {baseline_metrics['precision']:.3f}")
print(f"   Recall:    {baseline_metrics['recall']:.3f}")
print(f"   F1-Score:  {baseline_metrics['f1']:.3f}")


# Model Training

In [None]:

# === LOAD MODEL AND PREPARE TRAINING ===
print("\nü§ñ Loading base model...")
model = SentenceTransformer(CONFIG['base_model'])

train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=CONFIG['batch_size'])
train_loss = losses.CosineSimilarityLoss(model)

# Prepare validation evaluator
val_sentences1 = [ex.texts[0] for ex in val_examples[:200]]
val_sentences2 = [ex.texts[1] for ex in val_examples[:200]]
val_scores = [ex.label for ex in val_examples[:200]]
evaluator = evaluation.EmbeddingSimilarityEvaluator(val_sentences1, val_sentences2, val_scores)

# === TRAINING ===
print("\nüöÄ Starting training...")
print("="*60)

training_start = time.time()

model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    evaluator=evaluator,
    epochs=CONFIG['epochs'],
    warmup_steps=CONFIG['warmup_steps'],
    output_path=CONFIG['output_path'],
    evaluation_steps=CONFIG['evaluation_steps'],
    save_best_model=True,
    show_progress_bar=True
)

training_time = time.time() - training_start
print(f"\n‚úÖ Training complete in {training_time:.2f} seconds!")
print(f"‚úÖ Model saved to: {CONFIG['output_path']}")

# === POST-TRAINING EVALUATION ===
print("\nüìä Evaluating FINE-TUNED model (after training)...")
finetuned_metrics = evaluate_recommendations(
    CONFIG['output_path'], 
    test_users, 
    events_df, 
    ground_truth,
    "Fine-tuned"
)

print(f"   Precision: {finetuned_metrics['precision']:.3f}")
print(f"   Recall:    {finetuned_metrics['recall']:.3f}")
print(f"   F1-Score:  {finetuned_metrics['f1']:.3f}")

# === CALCULATE IMPROVEMENTS ===
precision_improvement = (finetuned_metrics['precision'] - baseline_metrics['precision']) / baseline_metrics['precision'] * 100
recall_improvement = (finetuned_metrics['recall'] - baseline_metrics['recall']) / baseline_metrics['recall'] * 100
f1_improvement = (finetuned_metrics['f1'] - baseline_metrics['f1']) / baseline_metrics['f1'] * 100

print("\nüìà IMPROVEMENT:")
print(f"   Precision: {precision_improvement:+.1f}%")
print(f"   Recall:    {recall_improvement:+.1f}%")
print(f"   F1-Score:  {f1_improvement:+.1f}%")


# Visualization

In [None]:

fig = plt.figure(figsize=(16, 12))
gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)

# Plot 1: Before vs After Metrics
ax1 = fig.add_subplot(gs[0, 0])
metrics = ['Precision', 'Recall', 'F1-Score']
baseline_vals = [baseline_metrics['precision'], baseline_metrics['recall'], baseline_metrics['f1']]
finetuned_vals = [finetuned_metrics['precision'], finetuned_metrics['recall'], finetuned_metrics['f1']]

x = np.arange(len(metrics))
width = 0.35

bars1 = ax1.bar(x - width/2, baseline_vals, width, label='Before Training', color='#e74c3c', alpha=0.8)
bars2 = ax1.bar(x + width/2, finetuned_vals, width, label='After Training', color='#2ecc71', alpha=0.8)

ax1.set_ylabel('Score', fontweight='bold', fontsize=11)
ax1.set_title('Model Performance: Before vs After Training', fontweight='bold', fontsize=12)
ax1.set_xticks(x)
ax1.set_xticklabels(metrics)
ax1.legend()
ax1.set_ylim(0, 1)
ax1.grid(axis='y', alpha=0.3)

# Add value labels on bars
for bars in [bars1, bars2]:
    for bar in bars:
        height = bar.get_height()
        ax1.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.3f}',
                ha='center', va='bottom', fontsize=9)

# Plot 2: Improvement Percentage
ax2 = fig.add_subplot(gs[0, 1])
improvements = [precision_improvement, recall_improvement, f1_improvement]
colors = ['#2ecc71' if imp > 0 else '#e74c3c' for imp in improvements]

bars = ax2.barh(metrics, improvements, color=colors, alpha=0.8)
ax2.set_xlabel('Improvement (%)', fontweight='bold', fontsize=11)
ax2.set_title('Performance Improvement After Training', fontweight='bold', fontsize=12)
ax2.axvline(0, color='black', linewidth=0.8)
ax2.grid(axis='x', alpha=0.3)

for i, (bar, val) in enumerate(zip(bars, improvements)):
    ax2.text(val + 1, i, f'{val:+.1f}%', va='center', fontsize=10)

# Plot 3: Precision Distribution (Before)
ax3 = fig.add_subplot(gs[0, 2])
ax3.hist(baseline_metrics['precision_scores'], bins=20, color='#e74c3c', alpha=0.7, edgecolor='black')
ax3.axvline(baseline_metrics['precision'], color='darkred', linestyle='--', linewidth=2, 
            label=f'Mean: {baseline_metrics["precision"]:.3f}')
ax3.set_xlabel('Precision@5', fontweight='bold', fontsize=11)
ax3.set_ylabel('Number of Users', fontweight='bold', fontsize=11)
ax3.set_title('Precision Distribution (Before Training)', fontweight='bold', fontsize=12)
ax3.legend()
ax3.grid(alpha=0.3)

# Plot 4: Precision Distribution (After)
ax4 = fig.add_subplot(gs[1, 0])
ax4.hist(finetuned_metrics['precision_scores'], bins=20, color='#2ecc71', alpha=0.7, edgecolor='black')
ax4.axvline(finetuned_metrics['precision'], color='darkgreen', linestyle='--', linewidth=2,
            label=f'Mean: {finetuned_metrics["precision"]:.3f}')
ax4.set_xlabel('Precision@5', fontweight='bold', fontsize=11)
ax4.set_ylabel('Number of Users', fontweight='bold', fontsize=11)
ax4.set_title('Precision Distribution (After Training)', fontweight='bold', fontsize=12)
ax4.legend()
ax4.grid(alpha=0.3)

# Plot 5: Recall Comparison
ax5 = fig.add_subplot(gs[1, 1])
ax5.hist(baseline_metrics['recall_scores'], bins=20, color='#e74c3c', alpha=0.5, 
         label=f'Before: {baseline_metrics["recall"]:.3f}', edgecolor='black')
ax5.hist(finetuned_metrics['recall_scores'], bins=20, color='#2ecc71', alpha=0.5,
         label=f'After: {finetuned_metrics["recall"]:.3f}', edgecolor='black')
ax5.set_xlabel('Recall@5', fontweight='bold', fontsize=11)
ax5.set_ylabel('Number of Users', fontweight='bold', fontsize=11)
ax5.set_title('Recall Distribution Comparison', fontweight='bold', fontsize=12)
ax5.legend()
ax5.grid(alpha=0.3)

# Plot 6: Training Summary Stats
ax6 = fig.add_subplot(gs[1, 2])
ax6.axis('off')
summary_text = f"""
TRAINING SUMMARY
{'='*40}

Dataset:
  ‚Ä¢ Users: {len(users_df):,}
  ‚Ä¢ Events: {len(events_df):,}
  ‚Ä¢ Training examples: {len(train_examples):,}
  ‚Ä¢ Validation examples: {len(val_examples):,}

Training Configuration:
  ‚Ä¢ Epochs: {CONFIG['epochs']}
  ‚Ä¢ Batch size: {CONFIG['batch_size']}
  ‚Ä¢ Training time: {training_time:.1f}s

Results:
  ‚Ä¢ Precision: {baseline_metrics['precision']:.3f} ‚Üí {finetuned_metrics['precision']:.3f}
  ‚Ä¢ Recall: {baseline_metrics['recall']:.3f} ‚Üí {finetuned_metrics['recall']:.3f}
  ‚Ä¢ F1-Score: {baseline_metrics['f1']:.3f} ‚Üí {finetuned_metrics['f1']:.3f}

Status: {'‚úÖ SUCCESS' if finetuned_metrics['precision'] > baseline_metrics['precision'] else '‚ö†Ô∏è  CHECK NEEDED'}
"""
ax6.text(0.1, 0.5, summary_text, fontsize=10, family='monospace',
         verticalalignment='center', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))

# Plot 7: Accuracy Over Time (simulated)
ax7 = fig.add_subplot(gs[2, :])
epochs_range = range(1, CONFIG['epochs'] + 1)
# Simulate training curve
base_prec = baseline_metrics['precision']
final_prec = finetuned_metrics['precision']
simulated_precision = [base_prec + (final_prec - base_prec) * (1 - np.exp(-2*e/CONFIG['epochs'])) 
                       for e in epochs_range]
simulated_recall = [baseline_metrics['recall'] + 
                    (finetuned_metrics['recall'] - baseline_metrics['recall']) * 
                    (1 - np.exp(-2*e/CONFIG['epochs'])) for e in epochs_range]

ax7.plot(epochs_range, simulated_precision, marker='o', linewidth=2, markersize=8, 
         label='Precision', color='#3498db')
ax7.plot(epochs_range, simulated_recall, marker='s', linewidth=2, markersize=8,
         label='Recall', color='#e74c3c')
ax7.axhline(y=0.85, color='green', linestyle='--', alpha=0.5, label='Target (0.85)')
ax7.fill_between(epochs_range, simulated_precision, alpha=0.2, color='#3498db')
ax7.set_xlabel('Epoch', fontweight='bold', fontsize=11)
ax7.set_ylabel('Score', fontweight='bold', fontsize=11)
ax7.set_title('Training Progress (Simulated Curve)', fontweight='bold', fontsize=12)
ax7.legend()
ax7.grid(alpha=0.3)
ax7.set_ylim(0, 1)

plt.suptitle('Sentence Transformer Fine-tuning Results - Algerian Youth Events', 
             fontsize=16, fontweight='bold', y=0.995)

plt.savefig('training_results.png', dpi=300, bbox_inches='tight')
print("‚úÖ Visualization saved to: training_results.png")

# === SAVE RESULTS ===
results = {
    "timestamp": datetime.now().isoformat(),
    "config": CONFIG,
    "baseline": {
        "precision": float(baseline_metrics['precision']),
        "recall": float(baseline_metrics['recall']),
        "f1": float(baseline_metrics['f1'])
    },
    "finetuned": {
        "precision": float(finetuned_metrics['precision']),
        "recall": float(finetuned_metrics['recall']),
        "f1": float(finetuned_metrics['f1'])
    },
    "improvement": {
        "precision_pct": float(precision_improvement),
        "recall_pct": float(recall_improvement),
        "f1_pct": float(f1_improvement)
    },
    "training_time_seconds": float(training_time)
}

with open('training_results.json', 'w') as f:
    json.dump(results, f, indent=2)

print("‚úÖ Results saved to: training_results.json")

# === FINAL SUMMARY ===
print("\n TRAINING COMPLETE!")
print("="*60)
print(f"\nüéØ Fine-tuned model location: {CONFIG['output_path']}")
print(f"\n üìä Metrics improved: {precision_improvement > 0 and recall_improvement > 0}")
print(f"\n ‚è±Ô∏è  Training time: {training_time:.1f}s")
print(f"\nüí° To use the fine-tuned model:")
print(f" \n   model = SentenceTransformer('{CONFIG['output_path']}')")

plt.show()