In [None]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline

from sklearn.neighbors import KNeighborsClassifier
from sklearn.dummy import DummyClassifier
from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split

from sklearn.metrics import (
    roc_auc_score,
    classification_report,
    confusion_matrix,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score
)


In [None]:
train = pd.read_csv("../data/processed/train_clean.csv")
test = pd.read_csv("../data/processed/test_clean.csv")

In [1]:
numeric_cols = ["Age", "Tenure", "Usage Frequency", "Support Calls",
                "Payment Delay", "Total Spend", "Last Interaction"]

categorical_cols = ["Gender", "Subscription Type", "Contract Length",
                    "Customer Status", "Last Due Date", "Last Payment Date"]

baseline_features = ["Customer Status", "Contract Length", "Total Spend"]

In [None]:
# X = train.drop(["Churn", "CustomerID"], axis=1)
# y = train["Churn"]
X_base = train[baseline_features]
y = train["Churn"]

In [None]:

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=1234, stratify=y)

In [None]:
X_train

In [None]:
y_train

In [None]:
preprocessor = make_column_transformer(
    (OneHotEncoder(handle_unknown='ignore'), categorical_cols),
    (StandardScaler(), numeric_cols),
    remainder='drop'
)


In [None]:
knn_pipe = make_pipeline(
    preprocessor,
    KNeighborsClassifier(n_neighbors=7)
)

knn_pipe.fit(X_train, y_train)


In [None]:
val_pred = knn_pipe.predict_proba(X_val)[:, 1]
auc = roc_auc_score(y_val, val_pred)
auc

In [None]:
knn_pipe.fit(X, y)

In [None]:
test_pred = knn_pipe.predict_proba(test.drop("CustomerID", axis=1))[:, 1]


In [None]:
submission_kn = pd.DataFrame({
    "CustomerID": test["CustomerID"],
    "Churn": test_pred
})

submission_kn.to_csv("knn_submission.csv", index=False)
submission_kn.head()
