In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression, Lasso, Ridge
import math
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
from tqdm.notebook import tqdm

In [2]:
data = pd.read_csv("/train.csv")

In [3]:
data.head()

Unnamed: 0,ClientPeriod,MonthlySpending,TotalSpent,Sex,IsSeniorCitizen,HasPartner,HasChild,HasPhoneService,HasMultiplePhoneNumbers,HasInternetService,HasOnlineSecurityService,HasOnlineBackup,HasDeviceProtection,HasTechSupportAccess,HasOnlineTV,HasMovieSubscription,HasContractPhone,IsBillingPaperless,PaymentMethod,Churn
0,55,19.5,1026.35,Male,0,Yes,Yes,Yes,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,One year,No,Mailed check,0
1,72,25.85,1872.2,Male,0,Yes,No,Yes,Yes,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,Two year,No,Credit card (automatic),0
2,1,75.9,75.9,Male,0,No,No,Yes,No,Fiber optic,No,No,No,Yes,No,No,Month-to-month,Yes,Electronic check,1
3,32,79.3,2570.0,Female,1,Yes,No,Yes,Yes,Fiber optic,No,No,Yes,No,No,No,Month-to-month,No,Mailed check,0
4,60,115.25,6758.45,Female,0,Yes,Yes,Yes,Yes,Fiber optic,Yes,Yes,Yes,Yes,Yes,Yes,Two year,No,Credit card (automatic),0


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5282 entries, 0 to 5281
Data columns (total 20 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   ClientPeriod              5282 non-null   int64  
 1   MonthlySpending           5282 non-null   float64
 2   TotalSpent                5282 non-null   object 
 3   Sex                       5282 non-null   object 
 4   IsSeniorCitizen           5282 non-null   int64  
 5   HasPartner                5282 non-null   object 
 6   HasChild                  5282 non-null   object 
 7   HasPhoneService           5282 non-null   object 
 8   HasMultiplePhoneNumbers   5282 non-null   object 
 9   HasInternetService        5282 non-null   object 
 10  HasOnlineSecurityService  5282 non-null   object 
 11  HasOnlineBackup           5282 non-null   object 
 12  HasDeviceProtection       5282 non-null   object 
 13  HasTechSupportAccess      5282 non-null   object 
 14  HasOnlin

In [43]:

data = data.dropna()

num_cols = [
    'ClientPeriod',
    'MonthlySpending',
    'TotalSpent', 'IsSeniorCitizen'
]


cat_cols = [
    'Sex',
    'HasPartner',
    'HasChild',
    'HasPhoneService',
    'HasMultiplePhoneNumbers',
    'HasInternetService',
    'HasOnlineSecurityService',
    'HasOnlineBackup',
    'HasDeviceProtection',
    'HasTechSupportAccess',
    'HasOnlineTV',
    'HasMovieSubscription',
    'HasContractPhone',
    'IsBillingPaperless',
    'PaymentMethod'
]

columns = num_cols + cat_cols
target = 'Churn'

In [19]:
for col in cat_cols:
    print(f"{col} DISTRIBUTION")
    print(data[col].value_counts())
    print()

Sex DISTRIBUTION
Male      900
Female    861
Name: Sex, dtype: int64

HasPartner DISTRIBUTION
No     936
Yes    825
Name: HasPartner, dtype: int64

HasChild DISTRIBUTION
No     1257
Yes     504
Name: HasChild, dtype: int64

HasPhoneService DISTRIBUTION
Yes    1600
No      161
Name: HasPhoneService, dtype: int64

HasMultiplePhoneNumbers DISTRIBUTION
No                  879
Yes                 721
No phone service    161
Name: HasMultiplePhoneNumbers, dtype: int64

HasInternetService DISTRIBUTION
Fiber optic    755
DSL            621
No             385
Name: HasInternetService, dtype: int64

HasOnlineSecurityService DISTRIBUTION
No                     886
Yes                    490
No internet service    385
Name: HasOnlineSecurityService, dtype: int64

HasOnlineBackup DISTRIBUTION
No                     756
Yes                    620
No internet service    385
Name: HasOnlineBackup, dtype: int64

HasDeviceProtection DISTRIBUTION
No                     778
Yes                    598
No i

In [44]:
#data = data.loc[data['TotalSpent'] != ' ']
data['TotalSpent'].replace([' '],['0'],inplace=True) 

In [45]:
col_perevod = [ 'HasPartner', 'IsBillingPaperless', 'HasChild', 'HasPhoneService']
for col in col_perevod:
    data[col] =  data[col].replace(to_replace = ['Yes', 'No'], value = [1, 0]) 
    cat_cols.remove(col)
    num_cols.append(col)

In [46]:
data = pd.get_dummies(data, columns=cat_cols)
columns_cat_new = []
for col_name in cat_cols:
    columns_cat_new.extend(filter(lambda x: x.startswith(col_name), data.columns))
cat_cols = columns_cat_new
columns = num_cols + cat_cols

In [47]:
from sklearn.preprocessing import StandardScaler

pca = StandardScaler()
pca.fit(data[columns])
data[columns] = pca.transform(data[columns])

In [11]:
data.corr()['Churn']

ClientPeriod                                   -0.352613
MonthlySpending                                 0.184341
TotalSpent                                     -0.201294
IsSeniorCitizen                                 0.156502
HasPartner                                     -0.147707
HasChild                                       -0.162438
HasPhoneService                                 0.010449
IsBillingPaperless                              0.193754
Churn                                           1.000000
Sex_Female                                      0.008453
Sex_Male                                       -0.008453
HasMultiplePhoneNumbers_No                     -0.033033
HasMultiplePhoneNumbers_No phone service       -0.010449
HasMultiplePhoneNumbers_Yes                     0.039657
HasInternetService_DSL                         -0.117465
HasInternetService_Fiber optic                  0.299786
HasInternetService_No                          -0.226836
HasOnlineSecurityService_No    

In [12]:
data_1 = data
for col in columns:
    if abs((data.corr()['Churn'][col])) * 10 // 1 ==  0:
        data_1 = data_1.drop([col], axis = 1)

In [48]:

per = ['ClientPeriod', 'MonthlySpending', 'TotalSpent', 'IsSeniorCitizen',
       'HasPartner', 'HasChild', 'IsBillingPaperless',
       'HasInternetService_DSL', 'HasInternetService_Fiber optic',
       'HasInternetService_No', 'HasOnlineSecurityService_No',
       'HasOnlineSecurityService_No internet service',
       'HasOnlineSecurityService_Yes', 'HasOnlineBackup_No',
       'HasOnlineBackup_No internet service', 'HasDeviceProtection_No',
       'HasDeviceProtection_No internet service', 'HasTechSupportAccess_No',
       'HasTechSupportAccess_No internet service', 'HasTechSupportAccess_Yes',
       'HasOnlineTV_No', 'HasOnlineTV_No internet service',
       'HasMovieSubscription_No', 'HasMovieSubscription_No internet service',
       'HasContractPhone_Month-to-month', 'HasContractPhone_One year',
       'HasContractPhone_Two year', 'PaymentMethod_Bank transfer (automatic)',
       'PaymentMethod_Credit card (automatic)',
       'PaymentMethod_Electronic check']

In [25]:
for col in per:
    data = data.loc[data[col] <= np.percentile(data[col], 95)]
for col in per:
    data = data.loc[data[col] >= np.percentile(data[col], 5)]

In [15]:
from sklearn.linear_model import LogisticRegression
X_train, X_test, y_train, y_test = train_test_split(data_1[per], data_1[target], train_size=0.8)
lg = LogisticRegression()
lg.fit(X_train, y_train)
pred = lg.predict(X_test)
from sklearn.metrics import roc_auc_score
print(roc_auc_score(y_test, pred))

0.7142629300524037


In [18]:
from sklearn.model_selection import GridSearchCV
parameters = {'C': [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1, 2, 5, 10],
             'penalty': ['l1', 'l2', 'elasticnet', 'none'],
              'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
             }
lg_new = LogisticRegression()
grid = GridSearchCV(lg_new, parameters, cv=5, scoring = 'roc_auc')
grid.fit(X_train, y_train)

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packag

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got elasticnet penalty.

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\sit

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
  

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got elasticnet penalty.

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\sit

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
  

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver sag supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\skl

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
  

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver sag supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\skl

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
  

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver sag supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\skl

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
  

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver sag supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\skl

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
  

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver sag supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\skl

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
  

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver sag supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\skl

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
  

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packag

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got elasticnet penalty.

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\sit

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
  

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver sag supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\skl

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 454, in _check_solver
  

GridSearchCV(cv=5, estimator=LogisticRegression(),
             param_grid={'C': [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1, 2, 5, 10],
                         'penalty': ['l1', 'l2', 'elasticnet', 'none'],
                         'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag',
                                    'saga']},
             scoring='roc_auc')

In [19]:
grid.cv_results_

{'mean_fit_time': array([0.00183053, 0.00140176, 0.02129774, 0.00147471, 0.04924273,
        0.02927804, 0.01500573, 0.0150599 , 0.02678061, 0.03365021,
        0.00122991, 0.00092087, 0.00109525, 0.00114217, 0.00154085,
        0.03117604, 0.02067957, 0.0011054 , 0.05796385, 0.04468918,
        0.00140171, 0.00127945, 0.02415876, 0.00118036, 0.05238972,
        0.02545099, 0.01668277, 0.01812353, 0.03854899, 0.03079448,
        0.00123963, 0.00118575, 0.00138893, 0.00119247, 0.0013495 ,
        0.02803621, 0.02131925, 0.0013546 , 0.06118569, 0.04684749,
        0.00150714, 0.00115499, 0.03675632, 0.00142655, 0.04145885,
        0.02929101, 0.02047853, 0.01939759, 0.04679208, 0.03499379,
        0.00151362, 0.00116539, 0.00111275, 0.00099044, 0.00115657,
        0.03318839, 0.03136463, 0.0020227 , 0.05508833, 0.04386911,
        0.00080199, 0.00086126, 0.04828835, 0.00120816, 0.05337157,
        0.02641935, 0.02045712, 0.01666975, 0.04621544, 0.03717704,
        0.00116587, 0.00114126,

In [20]:
grid.best_params_

{'C': 0.1, 'penalty': 'none', 'solver': 'sag'}

In [16]:
from sklearn.linear_model import LogisticRegression
X_train, X_test, y_train, y_test = train_test_split(data_1[per], data_1[target], train_size=0.8)
lg_1 = LogisticRegression(C = 0.1, penalty = 'none', solver = 'sag')
lg_1.fit(X_train, y_train)
pred = lg_1.predict(X_test)
from sklearn.metrics import roc_auc_score
print(roc_auc_score(y_test, pred))

0.7376082110078461




In [194]:
forest = RandomForestClassifier(n_estimators=300)
forest.fit(X_train, y_train)
pred = forest.predict(X_test)
from sklearn.metrics import roc_auc_score
print(roc_auc_score(y_test, pred))

0.7124590163934426


In [195]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.0.5-cp38-none-win_amd64.whl (73.9 MB)
Collecting plotly
  Downloading plotly-5.7.0-py2.py3-none-any.whl (28.8 MB)
Collecting graphviz
  Downloading graphviz-0.20-py3-none-any.whl (46 kB)
Collecting tenacity>=6.2.0
  Downloading tenacity-8.0.1-py3-none-any.whl (24 kB)
Installing collected packages: tenacity, plotly, graphviz, catboost
Successfully installed catboost-1.0.5 graphviz-0.20 plotly-5.7.0 tenacity-8.0.1


In [196]:
import catboost

In [197]:
data_new = pd.read_csv("/train.csv")

In [198]:
data = data.dropna()

num_cols_new = [
    'ClientPeriod',
    'MonthlySpending',
    'TotalSpent', 'IsSeniorCitizen'
]


cat_cols_new = [
    'Sex',
    'HasPartner',
    'HasChild',
    'HasPhoneService',
    'HasMultiplePhoneNumbers',
    'HasInternetService',
    'HasOnlineSecurityService',
    'HasOnlineBackup',
    'HasDeviceProtection',
    'HasTechSupportAccess',
    'HasOnlineTV',
    'HasMovieSubscription',
    'HasContractPhone',
    'IsBillingPaperless',
    'PaymentMethod'
]

columns_new = num_cols_new + cat_cols_new
target_new = 'Churn'

In [199]:
X_train, X_test, y_train, y_test = train_test_split(data_new[columns_new], data_new[target], train_size=0.8)

In [200]:
boosting_model = catboost.CatBoostClassifier(n_estimators=200, 
                                             cat_features=cat_cols_new)

boosting_model.fit(X_train, y_train)

pred = boosting_model.predict(X_test)
from sklearn.metrics import roc_auc_score
print(roc_auc_score(y_test, pred))

Learning rate set to 0.083391
0:	learn: 0.6374050	total: 258ms	remaining: 51.3s
1:	learn: 0.5996995	total: 351ms	remaining: 34.7s
2:	learn: 0.5637949	total: 442ms	remaining: 29s
3:	learn: 0.5376656	total: 508ms	remaining: 24.9s
4:	learn: 0.5158180	total: 575ms	remaining: 22.4s
5:	learn: 0.4982917	total: 635ms	remaining: 20.5s
6:	learn: 0.4835247	total: 681ms	remaining: 18.8s
7:	learn: 0.4717080	total: 738ms	remaining: 17.7s
8:	learn: 0.4610079	total: 790ms	remaining: 16.8s
9:	learn: 0.4527027	total: 837ms	remaining: 15.9s
10:	learn: 0.4457724	total: 881ms	remaining: 15.1s
11:	learn: 0.4408022	total: 913ms	remaining: 14.3s
12:	learn: 0.4345483	total: 970ms	remaining: 13.9s
13:	learn: 0.4298278	total: 1.03s	remaining: 13.7s
14:	learn: 0.4248701	total: 1.09s	remaining: 13.4s
15:	learn: 0.4210082	total: 1.14s	remaining: 13.1s
16:	learn: 0.4186495	total: 1.18s	remaining: 12.7s
17:	learn: 0.4154584	total: 1.23s	remaining: 12.4s
18:	learn: 0.4127839	total: 1.27s	remaining: 12.1s
19:	learn: 0.

160:	learn: 0.3467103	total: 6.92s	remaining: 1.68s
161:	learn: 0.3465889	total: 6.97s	remaining: 1.64s
162:	learn: 0.3464385	total: 7.03s	remaining: 1.59s
163:	learn: 0.3461223	total: 7.08s	remaining: 1.55s
164:	learn: 0.3456148	total: 7.12s	remaining: 1.51s
165:	learn: 0.3455069	total: 7.17s	remaining: 1.47s
166:	learn: 0.3454343	total: 7.22s	remaining: 1.43s
167:	learn: 0.3448469	total: 7.27s	remaining: 1.38s
168:	learn: 0.3446885	total: 7.32s	remaining: 1.34s
169:	learn: 0.3445250	total: 7.37s	remaining: 1.3s
170:	learn: 0.3440618	total: 7.42s	remaining: 1.26s
171:	learn: 0.3437744	total: 7.47s	remaining: 1.22s
172:	learn: 0.3435696	total: 7.51s	remaining: 1.17s
173:	learn: 0.3433179	total: 7.55s	remaining: 1.13s
174:	learn: 0.3430175	total: 7.58s	remaining: 1.08s
175:	learn: 0.3425791	total: 7.62s	remaining: 1.04s
176:	learn: 0.3425583	total: 7.67s	remaining: 997ms
177:	learn: 0.3421914	total: 7.72s	remaining: 954ms
178:	learn: 0.3418182	total: 7.75s	remaining: 909ms
179:	learn: 0

# итоговое обучение

In [42]:
data = pd.read_csv("/test(1).csv")


In [49]:
pred = lg_1.predict(data[per])

In [50]:
res = pd.read_csv('/submission.csv')

In [51]:
output = pd.DataFrame({'Id': data.index,
                       'Churn': pred})
output.to_csv('/submission.csv', index=False)

In [52]:
output

Unnamed: 0,Id,Churn
0,0,0
1,1,1
2,2,1
3,3,0
4,4,0
...,...,...
1756,1756,0
1757,1757,0
1758,1758,1
1759,1759,1


In [40]:
data.to_csv('/submission.csv', index=False)