<a href="https://colab.research.google.com/github/KishoreDuvvi/python/blob/main/Final_CA_1_Applied_statistics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from imblearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from sklearn import svm

# Data Preparation
data = pd.read_csv("/content/CustomerChurn.csv")
data['Gender'] = data['Gender'].map({'M': 1, 'F': 0})
data['Attrition_Flag'] = data['Attrition_Flag'].map({'Existing Customer': 1, 'Attrited Customer': 0})
data2 = pd.get_dummies(data, columns=['Customer_Age', 'Dependent_count', 'Education_Level', 'Marital_Status',
                                      'Income_Category', 'Card_Category', 'Months_on_book',
                                      'Total_Relationship_Count', 'Months_Inactive', 'Contacts_Count',
                                      'Credit_Limit', 'Total_Revolving_Bal', 'Total_Trans_Amt', 'Total_Trans_Ct'])
X = data2.drop('Attrition_Flag', axis=1)
Y = data2['Attrition_Flag']
X_scaled = StandardScaler().fit_transform(X)
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.3, random_state=100)
X_train, Y_train = SMOTE(random_state=100).fit_resample(X_train, Y_train)

# Decision Tree Classifier with Grid Search (method #3)
DT_classifier2 = Pipeline([
    ('balancing', SMOTE(random_state=101)),
    ('classification', RandomForestClassifier(n_estimators=100))
])
parameters = {'classification__max_depth': [2, 3, 4, 5, 10, 15, 20, 25, 30, 35]}
grid_search1 = GridSearchCV(estimator=DT_classifier2, param_grid=parameters, scoring='recall', cv=4)
grid_search1.fit(X_scaled, Y)
best_depth = grid_search1.best_params_
best_result = grid_search1.best_score_

# Decision Tree Classifier (method #1)
DT_classifier1 = RandomForestClassifier(n_estimators=100, max_depth=15)
m = DT_classifier1.fit(X_train, Y_train)
Y_pred1 = m.predict(X_test)
important_features = pd.Series(DT_classifier1.feature_importances_, index=list(X)).sort_values(ascending=False)

# Accuracy and Confusion Matrix
accuracy = metrics.accuracy_score(Y_test, Y_pred1)
con_matrix = metrics.confusion_matrix(Y_test, Y_pred1)
recall = metrics.recall_score(Y_test, Y_pred1)
precision = metrics.precision_score(Y_test, Y_pred1)

# Support Vector Machine (SVM) Classifier (method #1)
SVM_classifier1 = svm.SVC(kernel='linear', random_state=0)
SVM_classifier1.fit(X_train, Y_train)
Y_pred3 = SVM_classifier1.predict(X_test)

# Print Results
print("Best Depth:", best_depth)
print("Best Result:", best_result)
print("Important Features:\n", important_features)
print("Accuracy:", accuracy)
print("Confusion Matrix:\n", con_matrix)
print("Recall:", recall)
print("Precision:", precision)



Best Depth: {'classification__max_depth': 20}
Best Result: 0.9998097412480974
Important Features:
 Credit_Limit_1438.3          0.107849
Total_Revolving_Bal_0        0.058447
Customer_Age_46              0.030531
Months_Inactive_1            0.029837
Education_Level_Doctorate    0.028822
                               ...   
Credit_Limit_20176.0         0.000000
Credit_Limit_20144.0         0.000000
Credit_Limit_20117.0         0.000000
Credit_Limit_20114.0         0.000000
Total_Trans_Ct_134           0.000000
Length: 10155, dtype: float64
Accuracy: 0.8344017094017094
Confusion Matrix:
 [[   6  309]
 [   1 1556]]
Recall: 0.9993577392421323
Precision: 0.8343163538873994
