In [7]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

In [8]:
data=pd.read_csv('churn.csv')
mis_data=data.isnull().sum() # no missung
data.head()
data.dtypes  # a lot of categorial variables and 'TotalCharges' has numbers but it's 'O'

customerID           object
gender               object
SeniorCitizen         int64
Partner              object
Dependents           object
tenure                int64
PhoneService         object
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
StreamingTV          object
StreamingMovies      object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges         object
Churn                object
dtype: object

In [9]:
data['TotalCharges'] = pd.to_numeric(data['TotalCharges'], errors='coerce')
data.isnull().sum()
# As I can see I've lost some data because NaN was an object
t = []
for i in data['TotalCharges']:
    try:
        float_value = float(i)
    except ValueError:
        t.append(i)
data['TotalCharges'] = data['TotalCharges'].fillna(0)

In [10]:
# cleaning
X=data.drop(['customerID', 'Churn'],axis=1)
y = data.Churn
# cleaning data
for i in X.columns:
    if X[i].dtype == 'object':
        X = pd.get_dummies(X, columns=[i], prefix=[i])
# target encouding 
label=LabelEncoder()
y=label.fit_transform(y)
scaler = StandardScaler()
X=scaler.fit_transform(X)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0)

param_grid = {'penalty': ['l2'],  'C': [0.01 + i * 0.01 for i in range(10)]}
grid_search = GridSearchCV(LogisticRegression(), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

best_param=grid_search.best_params_
model=grid_search.best_estimator_

y_pred=model.predict(X_test)

In [12]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.84      0.90      0.87      1041
           1       0.64      0.52      0.58       368

    accuracy                           0.80      1409
   macro avg       0.74      0.71      0.72      1409
weighted avg       0.79      0.80      0.79      1409

