In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [None]:
# Load the dataset
data = pd.read_csv('bigml_59c28831336c6604c800002a.csv')
data.head()

Unnamed: 0,state,account length,area code,phone number,international plan,voice mail plan,number vmail messages,total day minutes,total day calls,total day charge,...,total eve calls,total eve charge,total night minutes,total night calls,total night charge,total intl minutes,total intl calls,total intl charge,customer service calls,churn
0,KS,128,415,382-4657,no,yes,25,265.1,110,45.07,...,99,16.78,244.7,91,11.01,10.0,3,2.7,1,False
1,OH,107,415,371-7191,no,yes,26,161.6,123,27.47,...,103,16.62,254.4,103,11.45,13.7,3,3.7,1,False
2,NJ,137,415,358-1921,no,no,0,243.4,114,41.38,...,110,10.3,162.6,104,7.32,12.2,5,3.29,0,False
3,OH,84,408,375-9999,yes,no,0,299.4,71,50.9,...,88,5.26,196.9,89,8.86,6.6,7,1.78,2,False
4,OK,75,415,330-6626,yes,no,0,166.7,113,28.34,...,122,12.61,186.9,121,8.41,10.1,3,2.73,3,False


In [4]:
# Preprocess the data
# encode categorical variables
df_encoded = pd.get_dummies(data, drop_first=True)



In [5]:
# split the data into features and target variable
X = df_encoded.drop('churn', axis=1)
y = df_encoded['churn']
# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
# choose values for k
k_values = [1, 3, 5, 7, 9]
# initialize KNN classifier
knn = KNeighborsClassifier(n_neighbors=10, )

In [8]:
# fit the KNN model
knn.fit(X_train, y_train)

In [9]:
# Evaluate the model
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.85


In [12]:
# Tune the the value of k

for k in range(1,21):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy for k={k}: {accuracy:.4f}')

Accuracy for k=1: 0.8486
Accuracy for k=2: 0.8486
Accuracy for k=3: 0.8501
Accuracy for k=4: 0.8471
Accuracy for k=5: 0.8486
Accuracy for k=6: 0.8501
Accuracy for k=7: 0.8456
Accuracy for k=8: 0.8486
Accuracy for k=9: 0.8486
Accuracy for k=10: 0.8501
Accuracy for k=11: 0.8501
Accuracy for k=12: 0.8501
Accuracy for k=13: 0.8501
Accuracy for k=14: 0.8486
Accuracy for k=15: 0.8486
Accuracy for k=16: 0.8486
Accuracy for k=17: 0.8486
Accuracy for k=18: 0.8486
Accuracy for k=19: 0.8486
Accuracy for k=20: 0.8486


In [15]:
# regression task
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

# Load the dataset
knn_reg = KNeighborsRegressor(n_neighbors=9)

# fit the KNN model
knn_reg.fit(X_train, y_train)

# Evaluate the model
y_pred = knn_reg.predict(X_test)
print(f'Accuracy: {accuracy:.4f}')

mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse:.4f}')

Accuracy: 0.8486
Mean Squared Error: 0.1225


In [16]:
# Distance metric euclidean
knn_with_metric = KNeighborsClassifier(n_neighbors=9, metric='euclidean', weights='distance')

# fit the KNN model
knn_with_metric.fit(X_train, y_train)
# Evaluate the model
y_pred = knn_with_metric.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy with euclidean distance: {accuracy:.4f}')
# Distance metric manhattan

Accuracy with euclidean distance: 0.8486


In [18]:
# Distance metric manhattan
knn_with_metric = KNeighborsClassifier(n_neighbors=9, metric='manhattan', weights='distance')

# fit the KNN model knn_with_metric.fit(X_train, y_train)
knn_with_metric.fit(X_train, y_train)
# Evaluate the model
y_pred = knn_with_metric.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy with manhattan distance: {accuracy:.4f}')

Accuracy with manhattan distance: 0.8516
