In [230]:
import pandas as pd
import numpy as np
import random
dataset = pd.read_csv('dataset.csv', index_col=0)
print(dataset)

     Age  MonthlyCharges Contract Type  Tenure  Churn
0     44             667       prepaid       4      1
1     48            1506      postpaid      40      0
2     61            1362      postpaid      48      0
3     67             684      postpaid      37      0
4     59             476       prepaid      43      0
..   ...             ...           ...     ...    ...
995   43            1153       prepaid      48      0
996   62            1044       prepaid       3      0
997   41             828      postpaid      24      1
998   41             965       prepaid      10      1
999   28             577       prepaid      14      0

[1000 rows x 5 columns]


In [231]:
max_age = dataset['Age'].max()
max_monthly_charges = dataset['MonthlyCharges'].max()
max_tenure = dataset['Tenure'].max()

dataset['Age'] = dataset['Age'] / max_age
dataset['MonthlyCharges'] = dataset['MonthlyCharges'] / max_monthly_charges
dataset['Tenure'] = dataset['Tenure'] / max_tenure

In [232]:
print(dataset)
weights = np.random.randn(4)  # 3 features + 1 bias
learning_rate = 0.01
num_iterations = 1000

          Age  MonthlyCharges Contract Type    Tenure  Churn
0    0.637681        0.333667       prepaid  0.083333      1
1    0.695652        0.753377      postpaid  0.833333      0
2    0.884058        0.681341      postpaid  1.000000      0
3    0.971014        0.342171      postpaid  0.770833      0
4    0.855072        0.238119       prepaid  0.895833      0
..        ...             ...           ...       ...    ...
995  0.623188        0.576788       prepaid  1.000000      0
996  0.898551        0.522261       prepaid  0.062500      0
997  0.594203        0.414207      postpaid  0.500000      1
998  0.594203        0.482741       prepaid  0.208333      1
999  0.405797        0.288644       prepaid  0.291667      0

[1000 rows x 5 columns]


In [233]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def gradient_descent(X, y, weights, learning_rate, num_iterations):
    m = len(y)
    for i in range(num_iterations):
        h = sigmoid(np.dot(X, weights))
        gradient = np.dot(X.T, (h - y)) / m
        weights -= learning_rate * gradient
    return weights

In [234]:
def train_test_split(x, y, test_size):
    split_index = int(len(x) * (1 - test_size))
    return x[:split_index], x[split_index:], y[:split_index], y[split_index:]
def accuracy_score(y_true, y_pred):
    correct_predictions = np.sum(y_true == y_pred)
    total_predictions = len(y_true)
    accuracy = correct_predictions / total_predictions
    return accuracy

def confusion_matrix(y_true, y_pred):
    tp = np.sum((y_true == 1) & (y_pred == 1))  # True Positives
    tn = np.sum((y_true == 0) & (y_pred == 0))  # True Negatives
    fp = np.sum((y_true == 0) & (y_pred == 1))  # False Positives
    fn = np.sum((y_true == 1) & (y_pred == 0))  # False Negatives
    
    return np.array([[tn, fp], [fn, tp]])

In [235]:
X = dataset[['Age', 'MonthlyCharges', 'Tenure']].values
y = dataset['Churn'].values
X = np.c_[np.ones(X.shape[0]), X]
optimal_weights = gradient_descent(X, y, weights, learning_rate, num_iterations)
def predict(X, weights):
    return sigmoid(np.dot(X, weights)) >= 0.5
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
optimal_weights = gradient_descent(X_train, y_train, weights, learning_rate, num_iterations)
y_pred = predict(X_test, optimal_weights)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy*100:.2f}%')

conf_matrix = confusion_matrix(y_test, y_pred)
print(f'Confusion Matrix:\n{conf_matrix}')

Accuracy: 56.00%
Confusion Matrix:
[[90 18]
 [70 22]]
