In [18]:
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, r2_score

In [19]:
training_dataset = pd.read_csv('training.csv')
testing_dataset = pd.read_csv('testing.csv')

training_dataset.dropna(subset=['Churn'], inplace=True)
testing_dataset.dropna(subset=['Churn'], inplace=True)

X_train = training_dataset.drop(['Churn', 'CustomerID'], axis=1)
y_train = training_dataset['Churn']
X_test = testing_dataset.drop(['Churn', 'CustomerID'], axis=1)
y_test = testing_dataset['Churn']

In [20]:
numeric_features = X_train.select_dtypes(include=['int64', 'float64']).columns
categorical_features = X_train.select_dtypes(include=['object']).columns

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression())
])

In [21]:
model_pipeline.fit(X_train, y_train)

y_pred = model_pipeline.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
r_squared = r2_score(y_test, y_pred)

print(f'Accuracy: {accuracy*100:.2f}%')
print(f'R-Squared: {r_squared}')

joblib.dump(model_pipeline, 'Logistic Regression.pkl')

Accuracy: 57.12%
R-Squared: -0.7199285564044422


['Logistic Regression.pkl']