# Model Prediction Example
This notebook demonstrates how to load saved testing data and run predictions using the `predict.py` script.

In [1]:
import sys
from pathlib import Path
import pandas as pd

# Ensure project root is in path
project_root = Path("..").resolve()
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from src.predict import predict

In [2]:
# Load test data
data_dir = project_root / "data" / "train_test"
X_test = pd.read_csv(data_dir / "X_test.csv")

print("Test data shape:", X_test.shape)
X_test.head()

Test data shape: (875, 49)


Unnamed: 0,Frequency,TotalQuantity,MaxQuantity,CustomerTenureDays,FirstPurchaseDaysAgo,PreferredDayOfWeek,PreferredMonth,WeekendPurchaseRatio,AvgDaysBetweenPurchases,UniqueDescriptions,...,ProductDiversity_Modéré,ProductDiversity_Spécialisé,Gender_Unknown,AccountStatus_Closed,AccountStatus_Suspended,GeoIP,RegistrationMonth,RegistrationDay,RegistrationDayOfWeek,DaysSinceRegistration
0,-0.228042,-0.033789,-0.06031,0.994,0.937226,4,0.396247,-0.4708,-0.19104,1.476855,...,False,False,False,False,False,1.477219,-1.346434,-1.092599,5,1.977663
1,0.219845,-0.188636,-0.06245,1.407654,0.819051,3,0.971933,-0.4708,-0.017658,0.336044,...,False,False,True,False,False,0.056453,0.055817,-1.664749,3,1.117665
2,-0.228042,-0.160866,-0.061023,0.572825,0.548937,1,-1.618653,-0.4708,0.390425,-0.444512,...,False,False,False,False,False,-1.104829,-0.224633,-1.435889,4,-0.881088
3,-0.004098,-0.117767,-0.009669,0.896227,0.312587,2,0.108404,-0.4708,0.154596,-0.168315,...,False,False,False,False,False,1.477219,-1.346434,-0.520449,3,1.948008
4,-0.451986,-0.205743,-0.026787,-1.006582,-0.894485,3,0.108404,-0.4708,-0.306782,-0.672674,...,True,False,False,False,False,1.477219,0.055817,-0.520449,0,-1.106466


In [3]:
# Run predictions
results = predict(X_test)

print("Prediction results shape:", results.shape)
results[["Churn_Prediction", "Churn_Probability"]].head(10)

Prediction results shape: (875, 51)


Unnamed: 0,Churn_Prediction,Churn_Probability
0,0,0.010056
1,0,0.000127
2,0,0.012711
3,0,0.0
4,1,1.0
5,0,0.000129
6,1,0.98
7,0,0.000129
8,0,0.01
9,1,0.989167


## Combining with Ground Truth (y_test)

In [4]:
y_test = pd.read_csv(data_dir / "y_test.csv")
results["Actual_Churn"] = y_test.values

# Show cases where model predicted churn (1)
results[results["Churn_Prediction"] == 1][["Actual_Churn", "Churn_Prediction", "Churn_Probability"]].head(10)

Unnamed: 0,Actual_Churn,Churn_Prediction,Churn_Probability
4,1,1,1.0
6,1,1,0.98
9,1,1,0.989167
13,1,1,0.99996
15,1,1,0.99996
16,1,1,0.985588
17,1,1,1.0
18,1,1,1.0
20,1,1,0.989653
21,1,1,0.997651
