Import libraries:

In [0]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

Read data, replace "," by "." and change type of columns:

In [0]:
data = pd.read_csv('lab7_data.csv', sep='\t')
for column in data.columns:
    data[column] = data[column].map(lambda x: str(x).replace(',', '.'))
    data[column] = data[column].map(lambda x: float(x))
data['churn'] = data['churn'].map(lambda x: int(x))
print(data.head())

   Unnamed: 0  contract_id  cnt_blocks_1m  cnt_blocks_2m  cnt_blocks_3m  \
0         0.0       4965.0            0.0            3.0            1.0   
1         1.0       4974.0            0.0            6.0            0.0   
2         2.0       2018.0            0.0            0.0            0.0   
3         3.0       2628.0            1.0            0.0            0.0   
4         4.0       3336.0            0.0            0.0            0.0   

   cnt_blocks_2m_sum  cnt_blocks_3m_sum  ratio_cnt_blocks_2m  \
0                3.0                4.0             0.777977   
1                6.0                6.0             1.000000   
2                0.0                0.0             0.744709   
3                1.0                1.0             0.000000   
4                0.0                0.0             0.000000   

   ratio_cnt_blocks_3m  avg_block_3m  ...    avg_daily_traffic_4w  \
0             0.000000         521.0  ...                0.000000   
1             0.415613    

Split data:

In [0]:
train, test = train_test_split(data, test_size=0.5, random_state=0)

Prepare matrices for test and train:

In [0]:
y_test = np.array(test['churn'])
test = test.drop(['churn'], axis=1)
X_test = np.array(test)

In [0]:
y_train = np.array(train['churn'])
train = train.drop(['churn'], axis=1)
X_train = np.array(train)

In [0]:
from sklearn.preprocessing import scale

scaled_X_train = scale(X_train)
scaled_X_test = scale(X_test)

Gridsearch for SVM:

In [0]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_curve, make_scorer
from sklearn.metrics import auc, roc_auc_score

scorer = make_scorer(roc_auc_score)
parameters = {'kernel':['linear', 'poly', 'rbf'], 'C':[1, 5, 10], 'degree':[2, 3, 4]}
svc = SVC(probability=True)
clf = GridSearchCV(svc, parameters, scoring=scorer)
clf.fit(scaled_X_train, y_train)
print(clf.best_params_)
clf.refit

pred = clf.predict_proba(scaled_X_test)
fpr, tpr, thresholds = roc_curve(y_test, pred[:,1], pos_label=1)
print(auc(fpr, tpr))

{'degree': 2, 'C': 10, 'kernel': 'linear'}
0.81945481205012


Gridsearch for DecisionTree

In [0]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_curve, make_scorer
from sklearn.metrics import auc, roc_auc_score

scorer = make_scorer(roc_auc_score)
parameters = {'criterion':['gini', 'entropy'], 'splitter':['random', 'best'], 'max_depth':[1, 2, 3, 4, 5, None], 
             'min_samples_split':[2, 3, 4, 5,], 'max_features':['auto', 'log2', None]}
             
dt = DecisionTreeClassifier()
clf = GridSearchCV(dt, parameters, scoring=scorer)
clf.fit(X_train, y_train)
print(clf.best_params_)
#clf.refit

pred = clf.predict_proba(X_test)
fpr, tpr, thresholds = roc_curve(y_test, pred[:,1], pos_label=1)
print(auc(fpr, tpr))

{'criterion': 'entropy', 'min_samples_split': 2, 'max_depth': 2, 'max_features': None, 'splitter': 'best'}
0.8091697769483692


Gridsearch for KNN:

In [0]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_curve, make_scorer
from sklearn.metrics import auc, roc_auc_score

scorer = make_scorer(roc_auc_score)
parameters = {'n_neighbors':[3,4,5,6], 'weights':['uniform', 'distance'],
              'algorithm':['ball_tree', 'kd_tree', 'brute'], 'leaf_size':[20, 30, 40], 'p':[1, 2, 3]}

knn = KNeighborsClassifier()
clf = GridSearchCV(knn, parameters, scoring=scorer)
clf.fit(X_train, y_train)
print(clf.best_params_)
clf.refit

pred = clf.predict_proba(X_test)
fpr, tpr, thresholds = roc_curve(y_test, pred[:,1], pos_label=1)
print(auc(fpr, tpr))

{'p': 1, 'weights': 'uniform', 'leaf_size': 20, 'algorithm': 'ball_tree', 'n_neighbors': 5}
0.5289289522793922
