In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd

df = pd.read_csv('cleaned_dataset.csv')

X = df.drop(['customerID','Churn','MonthlyCharges','SeniorCitizen', 'Partner','Dependents', 'TechSupport'],axis=1)
y = df['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing


lr = LogisticRegression()
scaler = preprocessing.StandardScaler().fit(X_train)
X_scaled = scaler.transform(X_train)

lr.fit(X_scaled, y_train)


In [3]:
y_pred = lr.predict(scaler.transform(X_test))
print(lr.coef_)
print(lr.intercept_)


[[-0.01959658 -0.00908438  1.43880281 -0.24670079  0.226884   -0.3664028
  -0.13971965  0.10544132  0.15263121  0.34421761  0.37764049  0.82732675
   0.19283549  0.19584448 -0.57470984]]
[1.73613698]


In [4]:
from sklearn.metrics import f1_score, accuracy_score

f1_score(y_test, y_pred)

np.float64(0.862708719851577)

In [5]:
accuracy_score(y_test, y_pred)

0.7896233120113717

In [6]:
# from sklearn.model_selection import GridSearchCV
# from sklearn.ensemble import RandomForestClassifier


# param_grid = {
#     'n_estimators': [100, 200, 300],
#    'max_depth': [5, 10, 15],
#    'min_samples_leaf': [3, 5, 7],
#    'min_samples_split': [2, 4, 6],
#    'max_features': ['sqrt', 'log2']}


# grid_search = GridSearchCV(RandomForestClassifier(), param_grid, cv=5)
# grid_search.fit(X_scaled, y_train)

# print('Best Parameters : ',grid_search.best_params_)
# best_lr_model = grid_search.best_estimator_
# print()
# y_pred_best = best_lr_model.predict(scaler.transform(X_test))
# print('f1_score : ',f1_score(y_test, y_pred_best))

In [12]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC
import scipy.stats as stats


# Define the parameter distribution
param_dist = {
    'C': stats.expon(scale=50),  # Exponential distribution for C
    'gamma': stats.expon(scale=.1),  # Exponential distribution for gamma
    'kernel': ['poly'],
    'degree': [2, 3, 4],  # Only relevant for 'poly' kernel
    'coef0': stats.uniform(0, 1)  # Uniform distribution for coef0
}

# Create the RandomizedSearchCV object
random_search = RandomizedSearchCV(SVC(), param_distributions=param_dist, n_iter=50, cv=5,verbose=5, scoring='accuracy')
random_search.fit(X_scaled, y_train)

print('Best Parameters : ',random_search.best_params_)
best_lr_model = random_search.best_estimator_
print()
y_pred_best = best_lr_model.predict(scaler.transform(X_test))
print('f1_score : ',f1_score(y_test, y_pred_best))

Fitting 5 folds for each of 50 candidates, totalling 250 fits
[CV 1/5] END C=14.360996090628971, coef0=0.1151486188681643, degree=4, gamma=0.14579423225446067, kernel=poly;, score=0.756 total time=  11.9s
[CV 2/5] END C=14.360996090628971, coef0=0.1151486188681643, degree=4, gamma=0.14579423225446067, kernel=poly;, score=0.746 total time=  14.2s
[CV 3/5] END C=14.360996090628971, coef0=0.1151486188681643, degree=4, gamma=0.14579423225446067, kernel=poly;, score=0.720 total time=   7.0s
[CV 4/5] END C=14.360996090628971, coef0=0.1151486188681643, degree=4, gamma=0.14579423225446067, kernel=poly;, score=0.737 total time=   8.8s
[CV 5/5] END C=14.360996090628971, coef0=0.1151486188681643, degree=4, gamma=0.14579423225446067, kernel=poly;, score=0.743 total time=  14.4s
[CV 1/5] END C=118.91783556523094, coef0=0.9790646057030195, degree=2, gamma=0.07549381067278707, kernel=poly;, score=0.801 total time=  10.5s
[CV 2/5] END C=118.91783556523094, coef0=0.9790646057030195, degree=2, gamma=0.0

In [None]:
#Best Parameters :  {'C': np.float64(17.91077034344432), 'coef0': np.float64(0.2528603271974953), 'degree': 4, 'gamma': np.float64(0.010905525656454272), 'kernel': 'poly'}

In [16]:
print('accuracy_score : ',accuracy_score(y_test, y_pred_best))

accuracy_score :  0.798862828713575


In [13]:
import pickle

with open('models/best_svc_poly.pkl', 'wb') as f:
    pickle.dump(best_lr_model, f)

In [10]:
with open('models/scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)