<a href="https://colab.research.google.com/github/Sagargupta16/LeetCode_Rating_Predictor/blob/main/LC_Contest_Rating_Predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import json
import numpy as np
import tensorflow as tf
import joblib
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

ROOT = Path("..") if Path("../data/data.json").exists() else Path(".")
DATA_PATH = ROOT / "data" / "data.json"
MODEL_PATH = ROOT / "model.keras"
SCALER_PATH = ROOT / "scaler.save"
NUM_FEATURES = 15

print(f"TensorFlow {tf.__version__}")
gpus = tf.config.list_physical_devices("GPU")
if gpus:
    print(f"GPU: {gpus[0].name}")
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
else:
    print("No GPU — training on CPU (use WSL2 for GPU)")
print(f"Data: {DATA_PATH.resolve()}")

## Data Collection

**Skip cells 2-4 if you already have `data/data.json`.**

To refresh training data, run from the project root:
```bash
python scripts/update_data.py
```
The REST contest API is now blocked (403), so data collection uses GraphQL via `scripts/update_data.py`.

*(Data collection cells removed -- use `python scripts/update_data.py` instead)*

## Load and Prepare Data — 15 Features

| # | Key | Feature | Correlation |
|---|-----|---------|-------------|
| 1 | f1 | current_rating | -0.124 |
| 2 | f2 | rank | **-0.339** |
| 3 | f3 | total_participants | -0.159 |
| 4 | f4 | rank_percentage | **-0.374** |
| 5 | f5 | attended_contests | -0.140 |
| 6 | f6 | avg_solve_rate (all-time) | -0.055 |
| 7 | f7 | avg_finish_time (all-time) | +0.091 |
| 8 | f8 | recent_solve_rate (last 5) | -0.086 |
| 9 | f9 | recent_finish_time (last 5) | +0.066 |
| 10 | f10 | rating_trend (last 5 changes) | +0.031 |
| 11 | f11 | max_rating | -0.118 |
| 12 | f12 | log(1 + rank) | **-0.432** |
| 13 | f13 | rating * percentile | **-0.432** |
| 14 | f14 | avg_solve_rate * rating | -0.094 |
| 15 | f15 | time_efficiency (avg_ft / 5400) | +0.091 |

In [None]:
# Load data (15 features: f1-f15 + output)
records = []
with open(DATA_PATH) as f:
    for line in f:
        records.append(json.loads(line))
print(f"Loaded {len(records)} records")

# Build feature matrix — order must match production main.py
X = np.array([[r[f"f{i}"] for i in range(1, NUM_FEATURES + 1)] for r in records])
y = np.array([r["output"] for r in records])

assert X.shape[1] == NUM_FEATURES, f"Expected {NUM_FEATURES} features, got {X.shape[1]}"
print(f"Features: {X.shape}, Output: mean={y.mean():.2f}, std={y.std():.2f}")

# Scale
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
joblib.dump(scaler, SCALER_PATH)
print(f"Scaler saved ({NUM_FEATURES} features)")

# Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.1, random_state=SEED)
print(f"Train: {X_train.shape[0]:,}, Test: {X_test.shape[0]:,}")

In [None]:
# Model: Dense(128)->Dropout(0.3)->Dense(64)->Dropout(0.2)->Dense(32)->Dense(1)
model = Sequential([
    Dense(128, activation="relu", input_shape=(NUM_FEATURES,)),
    Dropout(0.3),
    Dense(64, activation="relu"),
    Dropout(0.2),
    Dense(32, activation="relu"),
    Dense(1),
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
    loss="mse",
    metrics=["mae"],
)
model.summary()

early_stop = EarlyStopping(
    monitor="val_loss", patience=15, restore_best_weights=True, verbose=1
)

history = model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=300,
    batch_size=128,
    callbacks=[early_stop],
    verbose=2,
)

test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
print(f"\nTest MSE:  {test_loss:.2f}")
print(f"Test RMSE: {np.sqrt(test_loss):.2f}")
print(f"Test MAE:  {test_mae:.2f}")

# Save main model + versioned backup
model.save(MODEL_PATH)
print(f"Model saved to {MODEL_PATH}")

from datetime import datetime
version = datetime.now().strftime("%Y%m%d_%H%M%S")
versioned_dir = ROOT / "models" / "history"
versioned_dir.mkdir(parents=True, exist_ok=True)
model.save(versioned_dir / f"model_{version}.keras")
joblib.dump(scaler, versioned_dir / f"scaler_{version}.save")
with open(versioned_dir / f"metrics_{version}.json", "w") as mf:
    import json as _json
    _json.dump({"mse": float(test_loss), "rmse": float(np.sqrt(test_loss)),
                "mae": float(test_mae), "records": len(records),
                "features": NUM_FEATURES, "params": model.count_params()}, mf, indent=2)
print(f"Versioned backup: models/history/*_{version}.*")

In [None]:
# Sanity check
import math
rating, rank, participants, attended = 1957, 1869, 21165, 87
pct = (rank * 100) / participants
sample = np.array([[
    rating, rank, participants, pct, attended,       # f1-f5
    0.58, 3100, 0.45, 2200, 4.0, 2007,              # f6-f11 (example user history)
    math.log1p(rank), rating*(rank/participants),     # f12-f13
    0.58*rating, 3100/5400,                           # f14-f15
]])
pred = model.predict(scaler.transform(sample), verbose=0)
print(f"Rating: {rating}, Rank: {rank}/{participants}")
print(f"Predicted change: {pred[0][0]:+.2f}")
print(f"Predicted new rating: {rating + pred[0][0]:.2f}")

# Training curve
try:
    import matplotlib
    matplotlib.use("Agg")
    import matplotlib.pyplot as plt
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
    ax1.plot(history.history["loss"], label="Train")
    ax1.plot(history.history["val_loss"], label="Val")
    ax1.set_xlabel("Epoch"); ax1.set_ylabel("MSE"); ax1.set_title("Loss"); ax1.legend()
    ax2.plot(history.history["mae"], label="Train")
    ax2.plot(history.history["val_mae"], label="Val")
    ax2.set_xlabel("Epoch"); ax2.set_ylabel("MAE"); ax2.set_title("MAE"); ax2.legend()
    plt.tight_layout(); plt.show()
except ImportError:
    print("matplotlib not installed, skipping plot")