In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score,classification_report
from sklearn.preprocessing import StandardScaler

%matplotlib inline


# Load the data
data = pd.read_csv('C:/Users/HP/Downloads/archive (3)/WA_Fn-UseC_-Telco-Customer-Churn.csv')

#Exploratory data analysis
import matplotlib.pyplot as plt
import seaborn as sns
categorical_features = data.select_dtypes(include=['object']).columns
for feature in categorical_features:
    plt.figure(figsize=(8, 6))
    sns.countplot(x=feature, hue='Churn', data=data)  # Corrected line
    plt.title(f'Distribution of {feature} with respect to Churn')
    plt.show()
    
#Calculate Entropy of the given features
import math
def calculate_entropy(column):
    value_counts = column.value_counts()
    total_values = len(column)
    
    entropy = 0
    for count in value_counts:
        probability = count / total_values
        entropy -= probability * math.log2(probability)

    return entropy
#creating a dictionary in python 
column_names = data.columns.tolist()
entropy = {}
for i in column_names:
    entropy[i]=calculate_entropy(data[i])
    

    
# Define columns to drop
columns_to_drop = ['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
                   'OnlineSecurity', 'OnlineBackup',  'TechSupport', 'PaperlessBilling', 'StreamingTV', 'Churn']

# Drop unnecessary columns and perform one-hot encoding
x_new = data.drop(columns=columns_to_drop)
x = pd.get_dummies(data,x_new)

# Encode the target variable 'Churn'
data_encoded = pd.DataFrame()  # Assuming data_encoded is a DataFrame
data_encoded['Churn'] = data['Churn'].map({'Yes': 1, 'No': 0})
y = data_encoded['Churn']

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=42)

# Apply standard scaling to make the data C-contiguous
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.fit_transform(x_test)

# Define the Gradient Boosting classifier
classifier = GradientBoostingClassifier(random_state=42)

# Fit the model to the training data
classifier.fit(x_train_scaled, y_train)

# Make predictions on the test set
y_pred = classifier.predict(x_test_scaled)

# Evaluate the accuracy on the test set
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy on Test Data: ", accuracy)


In [None]:
report = classification_report(y_test, y_pred)

print("Accuracy on Test Data: ", accuracy)
print("Classification Report:")
print(report)