# Statistical Validation
This notebook demonstrates an 80/20 train/test split of `public_cases.json`, computes MAE, MAPE, WAPE and RMSE, runs t-tests/ANOVA on interview heuristics and performs a simple grid search for bonus and penalty values.

In [None]:
import json, pandas as pd, numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy import stats

In [None]:
# Load data
with open('public_cases.json') as f:
    cases = json.load(f)
records = []
for c in cases:
    rec = c['input'].copy()
    rec['expected_output'] = c['expected_output']
    records.append(rec)
df = pd.DataFrame(records)
df.head()

In [None]:
# Train/test split
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
len(train_df), len(test_df)

In [None]:
# Simple heuristic-based predictor
def predict_row(row, five_day_bonus=50, low_receipt_penalty=0.8):
    base = row['trip_duration_days'] * 100
    if row['trip_duration_days'] == 5:
        base += five_day_bonus
    miles = row['miles_traveled']
    if miles <= 100:
        mileage = miles * 0.6
    else:
        mileage = 100 * 0.6 + (miles - 100) * 0.4
    receipts = row['total_receipts_amount']
    if receipts < 50:
        receipts_component = receipts * low_receipt_penalty
    elif receipts <= 800:
        receipts_component = receipts * 0.8
    else:
        receipts_component = 800 * 0.8 + (receipts - 800) * 0.5
    if str(receipts).endswith('0.49') or str(receipts).endswith('0.99'):
        receipts_component += 10
    if 180 <= miles / row['trip_duration_days'] <= 220:
        mileage += 30
    return base + mileage + receipts_component


In [None]:
def apply_predict(df, **params):
    return df.apply(predict_row, axis=1, **params)

train_pred = apply_predict(train_df)
test_pred = apply_predict(test_df)

In [None]:
def mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def wape(y_true, y_pred):
    return np.sum(np.abs(y_true - y_pred)) / np.sum(np.abs(y_true)) * 100

def rmse(y_true, y_pred):
    return mean_squared_error(y_true, y_pred, squared=False)

def evaluate(df, pred):
    y_true = df['expected_output']
    metrics = {
        'MAE': mean_absolute_error(y_true, pred),
        'MAPE': mape(y_true, pred),
        'WAPE': wape(y_true, pred),
        'RMSE': rmse(y_true, pred)
    }
    return metrics

train_metrics = evaluate(train_df, train_pred)
test_metrics = evaluate(test_df, test_pred)
train_metrics, test_metrics

## T-tests and ANOVA
We now test some interview-based heuristics.

In [None]:
# Five-day bonus
per_diem = train_df['expected_output'] / train_df['trip_duration_days']
five_day = per_diem[train_df['trip_duration_days'] == 5]
other = per_diem[train_df['trip_duration_days'].isin([4,6])]
t_stat, p = stats.ttest_ind(five_day, other, equal_var=False)
print('Five-day bonus t-test p-value:', p)

In [None]:
# Efficiency bonus
train_df['miles_per_day'] = train_df['miles_traveled'] / train_df['trip_duration_days']
band = train_df[(train_df['miles_per_day']>=180) & (train_df['miles_per_day']<=220)]
non_band = train_df[(train_df['miles_per_day']<180) | (train_df['miles_per_day']>220)]
stat, p = stats.ttest_ind(band['expected_output'], non_band['expected_output'], equal_var=False)
print('Efficiency bonus t-test p-value:', p)

In [None]:
# Receipt buckets ANOVA
train_df['receipt_bucket'] = pd.cut(train_df['total_receipts_amount'], [0,50,800,3000])
groups = [group['expected_output'] for _, group in train_df.groupby('receipt_bucket')]
stat, p = stats.f_oneway(*groups)
print('Receipt bucket ANOVA p-value:', p)

## Grid search
We search over a few values for the five-day bonus and low-receipt penalty.

In [None]:
best = None
for bonus in range(0,151,25):
    for penalty in np.linspace(0.7,1.0,4):
        pred = apply_predict(train_df, five_day_bonus=bonus, low_receipt_penalty=penalty)
        mae = mean_absolute_error(train_df['expected_output'], pred)
        if not best or mae < best['mae']:
            best = {'bonus':bonus, 'penalty':penalty, 'mae':mae}
print('Best params', best)