In [1]:
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
dataset = pd.read_csv("PreTelco-Customer-Churn.csv")
dataset["TotalCharges"] = pd.to_numeric(dataset["TotalCharges"], errors="coerce")
dataset.dropna(inplace=True)

In [3]:
# Binary target
dataset["Churn"] = dataset["Churn"].map({"No": 0, "Yes": 1})

# One‑hot encode everything else
dataset = pd.get_dummies(dataset, drop_first=True)

In [4]:
# Keep just the 5 selected columns
feature_cols = [
    "tenure",
    "InternetService_Fiber optic",
    "StreamingMovies_No internet service",
    "Contract_Two year",
    "PaymentMethod_Electronic check",
]
X = dataset[feature_cols]
y = dataset["Churn"]

In [5]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=0, stratify=y)

In [6]:
# BUILD PIPELINE  (Scaler → ANOVA selector → Linear SVM)
pipeline = Pipeline([
    ("scaler",   StandardScaler()),
    ("selector", SelectKBest(score_func=f_classif, k=5)),   # keeps all five but stays consistent
    ("svm",      SVC(kernel="linear",
                     probability=True,
                     class_weight="balanced",
                     random_state=42))
])

# Fit pipeline
pipeline.fit(X_train, y_train)

In [7]:
# Evaluate
pred_test = pipeline.predict(X_test)
print("Confusion matrix:\n", confusion_matrix(y_test, pred_test))
print("\nClassification report:\n", classification_report(y_test, pred_test))

Confusion matrix:
 [[1208  517]
 [ 123  500]]

Classification report:
               precision    recall  f1-score   support

           0       0.91      0.70      0.79      1725
           1       0.49      0.80      0.61       623

    accuracy                           0.73      2348
   macro avg       0.70      0.75      0.70      2348
weighted avg       0.80      0.73      0.74      2348



In [8]:
# SAVE ENTIRE PIPELINE IN ONE FILE
joblib.dump(pipeline, "Final_model_TCCP.sav")
print("Saved: Final_model_TCCP.sav")

Saved: Final_model_TCCP.sav


In [9]:
# SIMPLE CLI PREDICTION (uses the SAME pipeline)
#    ─ enter 5 values in the exact order of `feature_cols`
loaded_pipe = joblib.load("Final_model_TCCP.sav")

tenure                     = float(input("tenure: "))
is_fiber_optic             = int(input("InternetService_Fiber optic (1/0): "))
no_stream_movies           = int(input("StreamingMovies_No internet service (1/0): "))
contract_two_year          = int(input("Contract_Two year (1/0): "))
pm_electronic_check        = int(input("PaymentMethod_Electronic check (1/0): "))

tenure:  2
InternetService_Fiber optic (1/0):  1
StreamingMovies_No internet service (1/0):  0
Contract_Two year (1/0):  0
PaymentMethod_Electronic check (1/0):  0


In [12]:
row = [[tenure,
        is_fiber_optic,
        no_stream_movies,
        contract_two_year,
        pm_electronic_check]]

In [13]:
prediction = loaded_pipe.predict(row)[0]
prob       = loaded_pipe.predict_proba(row)[0][1]

print("\nPrediction:", "Churn" if prediction else "No Churn")
print(f"Probability of churn: {prob:.2%}")


Prediction: Churn
Probability of churn: 60.31%


