In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load the dataset
file_path = "/content/sample_data/churn_Data.csv"
data = pd.read_csv(file_path)

# Drop missing value rows
print("Cleaning Data...")
data = data.dropna(subset=['TotalCharges'])
data['TotalCharges'] = pd.to_numeric(data['TotalCharges'], errors='coerce')
#Use binary for the target variable
data['Churn'] = data['Churn'].map({'Yes': 1, 'No': 0})

# Encode categorical features
categorical_columns = [
    'gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
    'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
    'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod'
]
encoder = LabelEncoder()
for col in categorical_columns:
    data[col] = encoder.fit_transform(data[col])

# Swap missing values in numerical columns with 0
numerical_columns = ['tenure', 'MonthlyCharges', 'TotalCharges']
for col in numerical_columns:
    data[col].fillna(0, inplace=True)
features = [
    'tenure', 'MonthlyCharges', 'TotalCharges', 'gender', 'SeniorCitizen',
    'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService',
    'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
    'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod'
]
X = data[features]
y = data['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Use KNN with 5 neighbors
k = 5
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("Report:")
print(report)


Cleaning Data...
Selecting Features...
Training KNN Model...
Evaluating Model...


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[col].fillna(0, inplace=True)


Accuracy: 0.75
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.85      0.83      1539
           1       0.55      0.50      0.52       574

    accuracy                           0.75      2113
   macro avg       0.68      0.67      0.68      2113
weighted avg       0.75      0.75      0.75      2113



In [None]:
from google.colab import drive
drive.mount('/content/drive')