In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline

In [None]:
df = pd.read_csv('data.csv')
df

In [None]:
df.drop('customerID', axis = 1, inplace=True)
df

In [None]:
df.columns = df.columns.str.lower()
df

In [None]:
cat_var = [var for var in df.columns if df[var].dtype == 'object']
cat_var

In [None]:
for cols in cat_var:
    df[cols] = df[cols].str.lower().str.replace(' ', '_')
df

In [None]:
missing_values = [var for var in df.columns if df[var].isnull().sum()]
missing_values

In [None]:
df

In [None]:
print(cat_var)

In [None]:
df

In [None]:
cat_var.remove('churn')
for var in cat_var:
    print(var)
    print(df[var].nunique())

In [None]:
df

In [None]:
X = df.drop('churn', axis=1)
y = df['churn']

In [None]:
class ValueCountTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, cat_var):
        self.cat_var = cat_var
        self.value_count_mapping = {}

    def fit(self, X, y = None):
        for cols in self.cat_var:
            self.value_count_mapping[cols] = X[cols].value_counts().to_dict()
        return self

    def transform(self, X, y = None):
        X_transformed = X.copy()
        for cols in self.cat_var:
            X_transformed[cols] = X_transformed[cols].map(self.value_count_mapping[cols]).fillna(0)
        return X_transformed

In [None]:
cat_columns = X.select_dtypes(include=['object']).columns.tolist()

In [None]:
value_count_transformer = ValueCountTransformer(cat_columns)

In [None]:
pipeline = Pipeline([
    ('value_counts', value_count_transformer),
    ('logistic_regression', LogisticRegression(max_iter=200))
])

In [None]:
print(df.columns)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
pipeline.fit(X_train, y_train)

In [None]:
prediction = pipeline.predict(X_test)

In [None]:
comparing = pd.DataFrame({'Actual vaue': y_test, 'Predicted Value': prediction})
comparing

In [None]:
ac = accuracy_score(y_test, prediction)
ac

In [None]:
cm = confusion_matrix(y_test, prediction)
print(cm)

In [None]:
import pickle

In [None]:
with open('pipeline.pkl', 'wb') as f:
    pickle.dump(pipeline, f)

In [None]:
#pickle.dump(pipeline, open(r'C:\Users\ajana\OneDrive\ML\ML Zoom camp\Section 5\model_saved', 'wb'))

In [None]:
#model_load = pickle.load(open('model_saved', 'rb'))