In [89]:
import numpy as np
class logistic_regression:
    def __init__(self):
        self.weights = None
        self.learning_rate = 0.0001
        
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def fit(self, X, y, epochs=10000):
        # adding a column of ones to the input matrix
        X = np.hstack((np.ones((X.shape[0], 1)), X))
        self.weights = np.zeros(X.shape[1])
        
        for _ in range(epochs):
            y_pred = self.sigmoid(np.dot(X, self.weights))
            gradient = np.dot(X.T, (y - y_pred))
            self.weights += self.learning_rate * gradient
            
    def predict(self, X):
        X = np.hstack((np.ones((X.shape[0], 1)), X))
        y_pred = self.sigmoid(np.dot(X, self.weights))
        return np.round(y_pred)

# Data Preprocessing

In [90]:
# preprocessing of a dataset

import pandas as pd


df=pd.read_csv("data1.csv")
df=df.drop(columns=["customerID"],axis=1)
df.drop_duplicates(inplace=True)
features = df.drop(["Churn"], axis=1)
target = df["Churn"]

# null values
features.isnull().sum()
# duplicate values
features.duplicated().sum()
# replacing space in float type columns
features["TotalCharges"]=features["TotalCharges"].replace(" ",np.nan)

# features.isnull().sum()
features["TotalCharges"]=features["TotalCharges"].astype(float)
features.fillna(features["TotalCharges"].mean(),inplace=True)
# Label Encoding the target variable
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
target = le.fit_transform(target)
#  converting object datatype to category
categorical_columns=[]
for col in features.columns:
    if features[col].dtype == 'object':
        categorical_columns.append(col)

# converting object datatype to category
for col in categorical_columns:
    features[col] = features[col].astype('category')

filter_col=list(features.select_dtypes(exclude=['category']).columns)

# one_hot encoding
features = pd.get_dummies(features)

# scaling the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
for col in filter_col:
    features[col] = scaler.fit_transform(features[[col]])

# features.head(10)

# target.shape

# Bagging

In [91]:
# ## Importing Libraries
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


# ## Splitting the data, 80% for training and 20% for testing, among the training data, 80% for training and 20% for validation
x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)
x_val.shape

(1124, 45)

In [92]:

features = pd.DataFrame(features)  
target = pd.Series(target)         


models=[]
accuracies=[]

for i in range(9):
    indices = resample(range(len(x_train)), replace=True, n_samples=int(0.8 * len(x_train)))
    

    sampled_features = features.iloc[indices]
    sampled_target = target.iloc[indices]


    model = logistic_regression()
    model.fit(x_train, y_train)
    models.append(model)

    y_pred = model.predict(x_test)
    accuracies.append(accuracy_score(y_test, y_pred))




# Stacking

In [93]:
# running the model on validation data, then adding those data as features to the validation data,so that we can train the meta model on it
val_features = []
for model in models:
    val_features.append(model.predict(x_val))
val_features = np.array(val_features).T

# now adding the features to the validation data
x_val = np.concatenate((x_val, val_features), axis=1)

# training the meta model
meta_model = logistic_regression()
meta_model.fit(x_val, y_val)

# running the model on the test data
test_features = []
for model in models:
    test_features.append(model.predict(x_test))
test_features = np.array(test_features).T

# now adding the features to the test data
x_test = np.concatenate((x_test, test_features), axis=1)

# testing the meta model
y_pred = meta_model.predict(x_test)
accuracy_score(y_test, y_pred)

0.7914590747330961