In [1]:
import os
import sys

project_root = r'c:\Users\anuda\Desktop\cell2cell_churn_drift'
os.chdir(project_root)
sys.path.insert(0, project_root)
from src.config import RAW_DATA 

print("Project Root:", os.getcwd()) 
print("Looking for:", RAW_DATA)
print("File Exists?", os.path.exists(RAW_DATA))

# Quick peek if it exists
if os.path.exists(RAW_DATA):
    import pandas as pd
    df = pd.read_csv(RAW_DATA)
    print("Success! Shape:", df.shape)
    print("Columns:", df.columns.tolist()[:10]) 
else:
    print("File not found — double-check download/path!")

    

Project Root: c:\Users\anuda\Desktop\cell2cell_churn_drift
Looking for: data/raw/cell2cellholdout.csv
File Exists? True
Success! Shape: (51047, 58)
Columns: ['CustomerID', 'Churn', 'MonthlyRevenue', 'MonthlyMinutes', 'TotalRecurringCharge', 'DirectorAssistedCalls', 'OverageMinutes', 'RoamingCalls', 'PercChangeMinutes', 'PercChangeRevenues']


In [2]:

import os
project_root = r'c:\Users\anuda\Desktop\cell2cell_churn_drift'
os.chdir(project_root)
import sys
sys.path.insert(0, project_root)
from src.config import TARGET

from src.data.preprocessing import load_and_preprocess, generate_batches

df_processed, scaler, le = load_and_preprocess()
print(f"Processed shape: {df_processed.shape}")


batches = generate_batches(df_processed, n_batches=5)
for i, batch in enumerate(batches):
    batch.to_csv(f"data/batches/batch_{i}.csv", index=False)
    print(f" Batch {i} shape: {batch.shape} | Churn rate: {batch[TARGET].mean():.2%}")

Raw shape: (51047, 58)
Raw Churn sample: ['Yes', 'Yes', 'No', 'No', 'Yes']
Processed Churn distribution:
Churn
0    0.711815
1    0.288185
Name: proportion, dtype: float64
Using 9 numerics
MonthlyRevenue dtype after fix: float64
MonthlyMinutes dtype after fix: float64
TotalRecurringCharge dtype after fix: float64
DirectorAssistedCalls dtype after fix: float64
OverageMinutes dtype after fix: float64
RoamingCalls dtype after fix: float64
PercChangeMinutes dtype after fix: float64
PercChangeRevenues dtype after fix: float64
HandsetPrice dtype after fix: float64
Processed saved: data/processed/churn_processed.csv
Processed head (Churn + first few):
   Churn  MonthlyRevenue  MonthlyMinutes  TotalRecurringCharge
0      1       -0.783096       -0.578622             -1.042504
1      1       -0.940828       -0.973610             -1.252478
2      0       -0.468083       -0.977390             -0.370584
3      0        0.528260        1.487037              1.183229
4      1       -0.937453       -

In [None]:
# Simulate Drift: Bump 'OverageMinutes' in batch 2 (e.g., +20% = more usage drift)
batch2 = pd.read_csv("data/batches/batch_2.csv")
print("batch",batch2.head(5))
batch2['OverageMinutes'] += batch2['OverageMinutes'] * 0.2  # Fake shift 
batch2.to_csv("data/batches/batch_2_driftsim.csv", index=False)
print("✅ Drift simulated in batch_2_driftsim.csv — now test detection!")

# Quick Test: Detect drift between baseline (batch_0) and drifted batch_2
from src.model.drift_detector import detect_drift
baseline = pd.read_csv("data/batches/batch_0.csv")
drifted = pd.read_csv("data/batches/batch_2_driftsim.csv")

drifts, has_drift = detect_drift(baseline, drifted)
print("Drift Results:", drifts)
print(f"Overall Drift Detected: {has_drift}")

batch     Churn  MonthlyRevenue  MonthlyMinutes  TotalRecurringCharge  \
0       0       -0.940828       -0.984950             -1.252478   
1       0        0.275574        0.307741              0.553305   
2       0       -0.312151        0.224585             -0.076620   
3       0       -0.110767       -0.916914              0.763280   
4       0        0.442306        3.214404              1.687169   
5       1       -0.232723       -0.300807             -0.076620   
6       1        0.001062        0.156549              0.553305   
7       1       -0.982680       -0.348054             -1.546443   
8       0       -0.085116       -0.070239              0.343330   
9       0       -0.548861        0.262383             -0.664549   
10      0       -0.176020       -0.750602              0.133355   
11      1       -0.484059       -0.414201             -0.622554   
12      0       -0.790072       -0.610750             -1.000509   
13      1        0.738419        0.458932              1