In [3]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the dataset into a pandas DataFrame
dataset_path = r'C:\Users\tahsi\OneDrive\Desktop\python_ws\telco-churn.csv'
df = pd.read_csv(dataset_path)

# Check the columns in the dataset
print("Columns in the dataset:", df.columns)

# Define features and target
# Assuming 'Churn' is the target variable and 'customerID' is an ID column to be dropped
X = df.drop(columns=['customerID', 'Churn'], errors='ignore')  # Use errors='ignore' to avoid KeyError
y = df['Churn'].apply(lambda x: 1 if x == 'Yes' else 0)  # Convert target to binary (0 and 1)

# Handle categorical variables if any
# This is a basic encoding; consider using more sophisticated methods if needed
X = pd.get_dummies(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Gradient Boosting model
gbm = GradientBoostingClassifier(random_state=42)

# Train the model
gbm.fit(X_train, y_train)

# Predict on the test set
y_pred_gbm = gbm.predict(X_test)

# Evaluate the model
accuracy_gbm = accuracy_score(y_test, y_pred_gbm)
print(f'Gradient Boosting Accuracy: {accuracy_gbm}')
print('Gradient Boosting Classification Report:')
print(classification_report(y_test, y_pred_gbm))
print('Gradient Boosting Confusion Matrix:')
print(confusion_matrix(y_test, y_pred_gbm))


Columns in the dataset: Index(['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')
Gradient Boosting Accuracy: 0.815471965933286
Gradient Boosting Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.92      0.88      1036
           1       0.70      0.52      0.60       373

    accuracy                           0.82      1409
   macro avg       0.77      0.72      0.74      1409
weighted avg       0.81      0.82      0.81      1409

Gradient Boosting Confusion Matrix:
[[954  82]
 [178 195]]
