### PART 1


In [199]:
import pandas as pd
import numpy as np

# Load the data
data = pd.read_csv('churn_data.csv')

# Prepare the features and target
X = data[['Age', 'MonthlyCharges', 'Tenure']]
y = data['Churn']

# One-hot encode the ContractType
contract_type_dummies = pd.get_dummies(data['ContractType'], prefix='Contract')
X = pd.concat([X, contract_type_dummies], axis=1)
print(X)

     Age  MonthlyCharges  Tenure  Contract_Month-to-month  Contract_One year  \
0     56          148.71      43                     True              False   
1     69           38.21      70                     True              False   
2     46           87.38      39                     True              False   
3     32          134.06      56                    False              False   
4     60          116.30      63                    False              False   
..   ...             ...     ...                      ...                ...   
602   27          116.40      33                    False              False   
603   45           75.40      21                    False               True   
604   52          133.20      18                     True              False   
605   41           96.80      27                    False              False   
606   55          121.00      31                    False               True   

     Contract_Two year  
0             

In [200]:
# Normalize continuous features
def normalize(X):
    return (X - X.mean()) / X.std()

X_normalized = X.copy()
print(X_normalized)
X_normalized[['Age', 'MonthlyCharges', 'Tenure']] = normalize(X[['Age', 'MonthlyCharges', 'Tenure']])
print(X_normalized)

# Convert to numpy arrays
X_normalized = X_normalized.values.astype("float")
y = y.values.astype("float")

     Age  MonthlyCharges  Tenure  Contract_Month-to-month  Contract_One year  \
0     56          148.71      43                     True              False   
1     69           38.21      70                     True              False   
2     46           87.38      39                     True              False   
3     32          134.06      56                    False              False   
4     60          116.30      63                    False              False   
..   ...             ...     ...                      ...                ...   
602   27          116.40      33                    False              False   
603   45           75.40      21                    False               True   
604   52          133.20      18                     True              False   
605   41           96.80      27                    False              False   
606   55          121.00      31                    False               True   

     Contract_Two year  
0             

In [201]:
# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Cost function
def compute_cost(X, y, theta):
    m = len(y)
    h = sigmoid(X @ theta)
    cost = (-1/m) * np.sum(y * np.log(h) + (1-y) * np.log(1-h))
    return cost

# Gradient descent
def gradient_descent(X, y, theta, alpha, num_iters):
    m = len(y)
    J_history = []
    
    for i in range(num_iters):
        h = sigmoid(X @ theta)
        gradient = (1/m) * X.T @ (h - y)
        theta = theta - alpha * gradient
        J_history.append(compute_cost(X, y, theta))
    
    return theta, J_history

In [202]:
# Add bias term to X
X_normalized = np.c_[np.ones((X_normalized.shape[0], 1)), X_normalized]

# Initialize parameters
theta = np.random.randn(X_normalized.shape[1])

# Set hyperparameters
alpha = 0.01
num_iters = 1000

# Run gradient descent
theta_optimal, J_history = gradient_descent(X_normalized, y, theta, alpha, num_iters)

In [203]:
# Predict function
def predict(X, theta):
    return sigmoid(X @ theta) >= 0.5

# Evaluate model
def evaluate_model(X, y, theta):
    y_pred = predict(X, theta)
    accuracy = np.mean(y_pred == y)
    
    TP = np.sum((y == 1) & (y_pred == 1))
    TN = np.sum((y == 0) & (y_pred == 0))
    FP = np.sum((y == 0) & (y_pred == 1))
    FN = np.sum((y == 1) & (y_pred == 0))
    
    confusion_matrix = np.array([[TN, FP], [FN, TP]])
    
    return accuracy, confusion_matrix

In [204]:
# Perform k-fold cross-validation
def cross_validation(X, y, k=5):
    fold_size = len(X) // k
    accuracies = []
    confusion_matrices = []
    
    for i in range(k):
        start = i * fold_size
        end = (i + 1) * fold_size
        
        X_test = X[start:end]
        y_test = y[start:end]
        X_train = np.concatenate([X[:start], X[end:]])
        y_train = np.concatenate([y[:start], y[end:]])
        
        theta = np.random.randn(X.shape[1])
        theta_optimal, _ = gradient_descent(X_train, y_train, theta, alpha, num_iters)
        
        accuracy, confusion_matrix = evaluate_model(X_test, y_test, theta_optimal)
        accuracies.append(accuracy)
        confusion_matrices.append(confusion_matrix)
    
    return np.mean(accuracies), np.mean(confusion_matrices, axis=0)

In [205]:
# Perform cross-validation
cv_accuracy, cv_confusion_matrix = cross_validation(X_normalized, y)
print(f"Cross-validation accuracy: {cv_accuracy:.4f}")
print("Average confusion matrix:")
print(cv_confusion_matrix)

Cross-validation accuracy: 0.7950
Average confusion matrix:
[[68.  11.2]
 [13.6 28.2]]


In [206]:
# Train final model on entire dataset
theta_final, _ = gradient_descent(X_normalized, y, theta, alpha, num_iters)

# Final evaluation
final_accuracy, final_confusion_matrix = evaluate_model(X_normalized, y, theta_final)
print(f"\nFinal model accuracy: {final_accuracy:.4f}")
print("Final confusion matrix:")
print(final_confusion_matrix)


Final model accuracy: 0.8254
Final confusion matrix:
[[362  36]
 [ 70 139]]


In [207]:
# Print optimal parameters
print("\nOptimal parameters:")
feature_names = ['Bias'] + list(X.columns)
for name, param in zip(feature_names, theta_final):
    print(f"{name}: {param:.4f}")


Optimal parameters:
Bias: 0.1035
Age: -0.0898
MonthlyCharges: 0.0982
Tenure: -0.7713
Contract_Month-to-month: 0.0119
Contract_One year: -1.3742
Contract_Two year: -1.1386


### PART 2

In [208]:
import pandas as pd
import numpy as np

# Load the data
data = pd.read_csv('churn_data.csv')

# Create a new column 'ChurnType' based on 'Churn'
def assign_churn_type(row):
    if row['Churn'] == 0:
        return 0  # No Churn
    else:
        # Randomly assign Voluntary (1) or Involuntary (2) Churn
        return np.random.choice([1, 2])

# Set random seed for reproducibility
np.random.seed(42)

# Apply the function to create the new column
data['ChurnType'] = data.apply(assign_churn_type, axis=1)

# Prepare the features and target
X = data[['Age', 'MonthlyCharges', 'Tenure']]
y = data['ChurnType']

# One-hot encode the ContractType
contract_type_dummies = pd.get_dummies(data['ContractType'], prefix='Contract')
X = pd.concat([X, contract_type_dummies], axis=1)

# Normalize continuous features
def normalize(X):
    return (X - X.mean()) / X.std()

X_normalized = X.copy()
X_normalized[['Age', 'MonthlyCharges', 'Tenure']] = normalize(X[['Age', 'MonthlyCharges', 'Tenure']])

# Convert to numpy arrays
X_normalized = X_normalized.values.astype("float")
y = y.values.astype("int")

# Add bias term to X
X_normalized = np.c_[np.ones((X_normalized.shape[0], 1)), X_normalized]

# Softmax function
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

# Cost function
def compute_cost(X, y, theta):
    m = len(y)
    h = softmax(X @ theta)
    cost = (-1/m) * np.sum(np.eye(3)[y] * np.log(h))
    return cost

# Gradient descent
def gradient_descent(X, y, theta, alpha, num_iters):
    m = len(y)
    J_history = []
    
    for i in range(num_iters):
        h = softmax(X @ theta)
        gradient = (1/m) * X.T @ (h - np.eye(3)[y])
        theta = theta - alpha * gradient
        J_history.append(compute_cost(X, y, theta))
    
    return theta, J_history

# Predict function
def predict(X, theta):
    return np.argmax(softmax(X @ theta), axis=1)

# Evaluate model
def evaluate_model(X, y, theta):
    y_pred = predict(X, theta)
    accuracy = np.mean(y_pred == y)
    
    confusion_matrix = np.zeros((3, 3), dtype=int)
    for true, pred in zip(y, y_pred):
        confusion_matrix[true, pred] += 1
    
    return accuracy, confusion_matrix

# Perform k-fold cross-validation
def cross_validation(X, y, k=5):
    fold_size = len(X) // k
    accuracies = []
    confusion_matrices = []
    
    for i in range(k):
        start = i * fold_size
        end = (i + 1) * fold_size
        
        X_test = X[start:end]
        y_test = y[start:end]
        X_train = np.concatenate([X[:start], X[end:]])
        y_train = np.concatenate([y[:start], y[end:]])
        
        theta = np.random.randn(X.shape[1], 3)
        theta_optimal, _ = gradient_descent(X_train, y_train, theta, alpha, num_iters)
        
        accuracy, confusion_matrix = evaluate_model(X_test, y_test, theta_optimal)
        accuracies.append(accuracy)
        confusion_matrices.append(confusion_matrix)
    
    return np.mean(accuracies), np.mean(confusion_matrices, axis=0)

# Set hyperparameters
alpha = 0.01
num_iters = 1000

# Initialize parameters
theta = np.random.randn(X_normalized.shape[1], 3)

# Perform cross-validation
cv_accuracy, cv_confusion_matrix = cross_validation(X_normalized, y)
print(f"Cross-validation accuracy: {cv_accuracy:.4f}")
print("Average confusion matrix:")
print(cv_confusion_matrix)

# Train final model on entire dataset
theta_final, _ = gradient_descent(X_normalized, y, theta, alpha, num_iters)

# Final evaluation
final_accuracy, final_confusion_matrix = evaluate_model(X_normalized, y, theta_final)
print(f"\nFinal model accuracy: {final_accuracy:.4f}")
print("Final confusion matrix:")
print(final_confusion_matrix)

# Print optimal parameters
print("\nOptimal parameters:")
feature_names = ['Bias'] + list(X.columns)
for i, class_name in enumerate(['No Churn', 'Voluntary Churn', 'Involuntary Churn']):
    print(f"\nClass: {class_name}")
    for name, param in zip(feature_names, theta_final[:, i]):
        print(f"{name}: {param:.4f}")

Cross-validation accuracy: 0.7190
Average confusion matrix:
[[74.2  3.6  1.4]
 [ 8.6  9.4  2.8]
 [ 8.8  8.8  3.4]]

Final model accuracy: 0.7035
Final confusion matrix:
[[375   6  17]
 [ 48  12  44]
 [ 49  16  40]]

Optimal parameters:

Class: No Churn
Bias: 0.1668
Age: 0.6699
MonthlyCharges: -0.0792
Tenure: 0.6008
Contract_Month-to-month: -1.2585
Contract_One year: 0.7620
Contract_Two year: 0.4435

Class: Voluntary Churn
Bias: -0.9904
Age: 0.0658
MonthlyCharges: 0.0571
Tenure: -0.2728
Contract_Month-to-month: -0.7032
Contract_One year: -0.3765
Contract_Two year: -0.1479

Class: Involuntary Churn
Bias: -1.3255
Age: 0.1985
MonthlyCharges: -0.3790
Tenure: -0.4960
Contract_Month-to-month: -0.1959
Contract_One year: -0.8239
Contract_Two year: -1.4422
