In [None]:
# 1) Clone repo (useful on fresh Kaggle session) and install deps
!git clone https://github.com/ShMazumder/Benchmarking-MoR-on-fine-tuned-SLM.git || true
%cd Benchmarking-MoR-on-fine-tuned-SLM/code
# Install requirements (Kaggle may already have torch; this will install others)
!pip install -r requirements.txt

In [None]:
# 2) (Optional) Reduce epochs in config.py for quicker testing
from pathlib import Path
cfg_path = Path('config.py')
if cfg_path.exists():
    cfg = cfg_path.read_text()
    cfg = cfg.replace('epochs_baseline = 30','epochs_baseline = 3')
    cfg = cfg.replace('epochs_mor_exp1 = 30','epochs_mor_exp1 = 3')
    cfg = cfg.replace('epochs_mor_exp2 = 50','epochs_mor_exp2 = 5')
    cfg_path.write_text(cfg)
    print('Updated config.py to smaller epoch counts for quick tests')
else:
    print('config.py not found; skipping epoch reduction')

## 3) Run Tiny Shakespeare experiments
The commands below mirror `run_all_experiments.sh` for the Tiny Shakespeare dataset. Run cells one at a time so you can monitor logs and abort if necessary.

In [None]:
# 3.1 Training Baseline N=12
!echo '--- Training Baseline N=12 (shakespeare) ---'
!python train.py --dataset shakespeare --experiment baseline_12

In [None]:
# 3.2 Training MoR Exp1 (N=12)
!echo '--- Training MoR Exp1 (shakespeare) ---'
!python train.py --dataset shakespeare --experiment mor_exp1

In [None]:
# 3.3 Training Baseline N=6
!echo '--- Training Baseline N=6 (shakespeare) ---'
!python train.py --dataset shakespeare --experiment baseline_6

In [None]:
# 3.4 Training MoR Exp2 (N=12, Eâ‰ˆ6)
!echo '--- Training MoR Exp2 (shakespeare) ---'
!python train.py --dataset shakespeare --experiment mor_exp2

## 4) Run WikiText-2 experiments
Proceed similarly for the wikitext dataset.

In [None]:
# 4.1 Training Baseline N=12 (wikitext)
!echo '--- Training Baseline N=12 (wikitext) ---'
!python train.py --dataset wikitext --experiment baseline_12

In [None]:
# 4.2 Training MoR Exp1 (wikitext)
!echo '--- Training MoR Exp1 (wikitext) ---'
!python train.py --dataset wikitext --experiment mor_exp1

In [None]:
# 4.3 Training Baseline N=6 (wikitext)
!echo '--- Training Baseline N=6 (wikitext) ---'
!python train.py --dataset wikitext --experiment baseline_6

In [None]:
# 4.4 Training MoR Exp2 (wikitext)
!echo '--- Training MoR Exp2 (wikitext) ---'
!python train.py --dataset wikitext --experiment mor_exp2

## 5) Aggregate results and plot summaries
This cell searches the `results/` directory for result JSONs and history files, prints a summary table, and (if history files exist) plots training loss and accuracy for each history file found.

In [None]:
import json
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

p = Path('results')
if not p.exists():
    print('No results/ directory found. Run the experiment cells first.')
else:
    rows = []
    for j in sorted(p.glob('*.json')):
        try:
            data = json.load(open(j))
            rows.append({
                'file': j.name,
                'experiment': data.get('experiment'),
                'model_type': data.get('model_type'),
                'accuracy': data.get('accuracy'),
                'test_accuracy': data.get('test_accuracy')
            })
        except Exception as e:
            print('Could not read', j, e)
    if rows:
        df = pd.DataFrame(rows)
        display(df.sort_values(['model_type','experiment']))
    else:
        print('No result JSON files parsed.')

    # Plot each history file (if present)
    hist_files = list(p.glob('*_history.json'))
    if not hist_files:
        print('No history JSON files found (*_history.json).')
    else:
        for hf in hist_files:
            try:
                hist = json.load(open(hf))
                epochs = [h['epoch'] for h in hist]
                loss = [h.get('loss') for h in hist]
                acc = [h.get('acc') for h in hist]
                fig, ax1 = plt.subplots()
                if any(v is not None for v in loss):
                    ax1.plot(epochs, loss, '-o', color='tab:red', label='train loss')
                    ax1.set_ylabel('loss', color='tab:red')
                ax2 = ax1.twinx()
                if any(v is not None for v in acc):
                    ax2.plot(epochs, acc, '-s', color='tab:blue', label='train acc')
                    ax2.set_ylabel('accuracy (%)', color='tab:blue')
                plt.title(f'Training metrics from {hf.name}')
                fig.tight_layout()
                plt.show()
            except Exception as e:
                print('Could not plot', hf, e)

---
Notes:
- Running all these experiments sequentially can be time-consuming; consider running a subset or using the optional epoch reduction cell above for quick verification.
- To run AMP variants, replace `train.py` calls with `train_amp.py --amp` if you have `train_amp.py` available and want mixed-precision.
- If you want me to execute this notebook here (on this machine) or adapt it for Kaggle (with specific runtime config), tell me which environment to use.