**Goal**: run multiple experiments with different YAML configs & log results.

In [1]:
%load_ext autoreload
%autoreload 2

Cell 1 – imports & helpers

In [2]:
import sys
from pathlib import Path
import json

import yaml
import numpy as np

PROJECT_ROOT = Path().resolve().parents[0]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

from src.data.load import load_all_sources, add_broad_category, load_yaml
from src.models.classifier import train_text_classifier
from src.models.evaluation import evaluate_classifier

EXPERIMENTS_DIR = PROJECT_ROOT / 'experiments' / 'results'
EXPERIMENTS_DIR.mkdir(parents=True, exist_ok=True)


Cell 2 – config loader and experiment function

In [5]:
def run_experiment(config_path: Path, experiment_name: str) -> dict:
    cfg = load_yaml(config_path)
    print(f'Running experiment: {experiment_name}')
    print(f'Using config: {config_path}')

    df = load_all_sources(cfg, root=PROJECT_ROOT)
    df = add_broad_category(df, cfg, root=PROJECT_ROOT)

    # (optional) filter to categories with enough samples
    min_samples = cfg['categories'].get('min_samples_per_class', 0)
    if min_samples > 0:
        counts = df['broad_category'].value_counts()
        keep_cats = counts[counts >= min_samples].index.tolist()
        df = df[df['broad_category'].isin(keep_cats)]
        print('Kept categories:', keep_cats)

    model, X_train, X_test, y_train, y_test, _ = train_text_classifier(df, cfg)

    from sklearn.metrics import classification_report
    y_pred = model.predict(X_test)

    metrics = evaluate_classifier(y_test, y_pred)
    print('Accuracy:', metrics['accuracy'])
    print('Macro F1:', metrics['macro_f1'])

    # Save metrics to JSON
    result = {
        'experiment_name': experiment_name,
        'config_path': str(config_path),
        'accuracy': metrics['accuracy'],
        'macro_f1': metrics['macro_f1'],
        'labels': metrics['labels'],
        'classification_report': metrics['classification_report'],
    }

    out_path = EXPERIMENTS_DIR / f'{experiment_name}.json'
    with out_path.open('w', encoding='utf-8') as f:
        json.dump(result, f, indent=2)

    return result


Cell 3 – run a few experiments

In [6]:
results = []

results.append(
    run_experiment(
        PROJECT_ROOT / 'configs' / 'base.yaml',
        experiment_name='exp_tfidf_logreg_bart',
    )
)

# If you create configs/model_svm.yaml (copy of base with SVM):
results.append(
    run_experiment(
        PROJECT_ROOT / 'configs' / 'model_svm.yaml',
        experiment_name='exp_tfidf_svm_bart',
    )
)

# If you create configs/model_nb.yaml:
# results.append(
#     run_experiment(
#         Path('configs/model_nb.yaml'),
#         experiment_name='exp_tfidf_nb_bart',
#     )
# )

results

INFO:src.data.load:Loading source pakistan_today from /home/spark/NUST/Semester 5/Data Mining/Project/data/raw/pakistan_today(full-data).csv (encoding=utf-8)


Running experiment: exp_tfidf_logreg_bart
Using config: /home/spark/NUST/Semester 5/Data Mining/Project/configs/base.yaml


INFO:src.data.load:Loading source tribune from /home/spark/NUST/Semester 5/Data Mining/Project/data/raw/tribune(full-data).csv (encoding=latin1)
INFO:src.data.load:Loading source dawn from /home/spark/NUST/Semester 5/Data Mining/Project/data/raw/dawn (full-data).csv (encoding=latin1)
INFO:src.data.load:Loading source daily_times from /home/spark/NUST/Semester 5/Data Mining/Project/data/raw/daily_times(full-data).csv (encoding=utf-8)
INFO:src.data.load:Loading preprocessed business_reorder from /home/spark/NUST/Semester 5/Data Mining/Project/data/interim/business_reorder_clean.parquet
INFO:src.data.load:Filtered invalid sources: (625905, 7) -> (624642, 7)
INFO:src.data.load:Combined dataset shape: (624642, 7)
INFO:src.data.load:Sampling up to 10000 rows per source (__file__ column).
  .apply(lambda g: g.sample(min(len(g), per_source), random_state=config['project']['random_seed']))


Kept categories: ['Pakistan', 'World', 'Business', 'Sports', 'Lifestyle', 'Opinion']


INFO:src.data.load:Loading source pakistan_today from /home/spark/NUST/Semester 5/Data Mining/Project/data/raw/pakistan_today(full-data).csv (encoding=utf-8)


Accuracy: 0.8586858685868587
Macro F1: 0.8462895743083197
Running experiment: exp_tfidf_svm_bart
Using config: /home/spark/NUST/Semester 5/Data Mining/Project/configs/model_svm.yaml


INFO:src.data.load:Loading source tribune from /home/spark/NUST/Semester 5/Data Mining/Project/data/raw/tribune(full-data).csv (encoding=latin1)
INFO:src.data.load:Loading source dawn from /home/spark/NUST/Semester 5/Data Mining/Project/data/raw/dawn (full-data).csv (encoding=latin1)
INFO:src.data.load:Loading source daily_times from /home/spark/NUST/Semester 5/Data Mining/Project/data/raw/daily_times(full-data).csv (encoding=utf-8)
INFO:src.data.load:Loading preprocessed business_reorder from /home/spark/NUST/Semester 5/Data Mining/Project/data/interim/business_reorder_clean.parquet
INFO:src.data.load:Filtered invalid sources: (625905, 7) -> (624642, 7)
INFO:src.data.load:Combined dataset shape: (624642, 7)
INFO:src.data.load:Sampling up to 10000 rows per source (__file__ column).
  .apply(lambda g: g.sample(min(len(g), per_source), random_state=config['project']['random_seed']))


Kept categories: ['Pakistan', 'World', 'Business', 'Sports', 'Lifestyle', 'Opinion']
Accuracy: 0.8687154429728687
Macro F1: 0.8519912210134223


[{'experiment_name': 'exp_tfidf_logreg_bart',
  'config_path': '/home/spark/NUST/Semester 5/Data Mining/Project/configs/base.yaml',
  'accuracy': 0.8586858685868587,
  'macro_f1': 0.8462895743083197,
  'labels': ['Business',
   'Lifestyle',
   'Opinion',
   'Pakistan',
   'Sports',
   'World'],
  'classification_report': {'Business': {'precision': 0.7580645161290323,
    'recall': 0.8627622377622378,
    'f1-score': 0.8070318887980377,
    'support': 1144.0},
   'Lifestyle': {'precision': 0.7617689015691869,
    'recall': 0.8855721393034826,
    'f1-score': 0.8190184049079755,
    'support': 603.0},
   'Opinion': {'precision': 0.706,
    'recall': 0.8506024096385543,
    'f1-score': 0.7715846994535519,
    'support': 415.0},
   'Pakistan': {'precision': 0.9313625552157663,
    'recall': 0.8248570568763166,
    'f1-score': 0.8748803064155761,
    'support': 3323.0},
   'Sports': {'precision': 0.9537037037037037,
    'recall': 0.9727177334732424,
    'f1-score': 0.9631168831168831,
    '

Cell 4 – compare experiments

In [7]:
import pandas as pd

df_results = pd.DataFrame(
    [
        {
            'experiment_name': r['experiment_name'],
            'accuracy': r['accuracy'],
            'macro_f1': r['macro_f1'],
        }
        for r in results
    ],
)
df_results


Unnamed: 0,experiment_name,accuracy,macro_f1
0,exp_tfidf_logreg_bart,0.858686,0.84629
1,exp_tfidf_svm_bart,0.868715,0.851991
