## Using Decision Trees for Classification

### Loading the Datasets

In [17]:
import pandas as pd

X_original = pd.read_excel("./Datasets/Filtered_features.xlsx")
X_pca = pd.read_excel("./Datasets/PCA_features.xlsx")

Sepertaing the Features and Targets

In [18]:
y_original = X_original["Churn Value"]
y_pca = X_pca["Churn Value"]

features_original = X_original.drop(columns=["Churn Value"])
features_pca = X_pca.drop(columns=["Churn Value"])

Splitting the Data into Train and Test Split

In [19]:
from sklearn.model_selection import train_test_split

X_train_original, X_test_original, y_train_original, y_test_original = train_test_split(features_original, y_original, test_size=0.33, random_state=2)

X_train_pca, X_test_pca, y_train_pca, y_test_pca = train_test_split(features_pca, y_pca, test_size=0.33, random_state=2)

### Creating and Fitting Decision Tree model

In [20]:
from sklearn.tree import DecisionTreeClassifier

dt_original = DecisionTreeClassifier(random_state=2)
dt_original.fit(X_train_original, y_train_original)
original_predicted = dt_original.predict(X_test_original)

dt_pca = DecisionTreeClassifier(random_state=2)
dt_pca.fit(X_train_pca, y_train_pca)
pca_predicted = dt_pca.predict(X_test_pca)

### Scoring the model
- on the basis of Accuracy
- on the basis of Precision
- on the basis of Recall
- on the basis of F1

In [21]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# for original features
print("Accuracy Score (with original features): ")
print(accuracy_score(y_test_original, original_predicted), "\n")
print("Precision Score (with original features): ")
print(precision_score(y_test_original, original_predicted), "\n")
print("Recall Score (with original features): ")
print(recall_score(y_test_original, original_predicted), "\n")
print("F1 Score (with original features): ")
print(f1_score(y_test_original, original_predicted), "\n", "\n")

# for pca features
print("Accuracy Score (with pca features): ")
print(accuracy_score(y_test_pca, pca_predicted), "\n")
print("Precision Score (with pca features): ")
print(precision_score(y_test_pca, pca_predicted), "\n")
print("Recall Score (with pca features): ")
print(recall_score(y_test_pca, pca_predicted), "\n")
print("F1 Score (with pca features): ")
print(f1_score(y_test_pca, pca_predicted), "\n")


Accuracy Score (with original features): 
0.7371822490305903 

Precision Score (with original features): 
0.5168350168350169 

Recall Score (with original features): 
0.4873015873015873 

F1 Score (with original features): 
0.5016339869281046 
 

Accuracy Score (with pca features): 
0.7147781128823782 

Precision Score (with pca features): 
0.4750778816199377 

Recall Score (with pca features): 
0.48412698412698413 

F1 Score (with pca features): 
0.4795597484276729 



In [29]:
new_original_features_train = X_train_original[['Partner', 'Dependents', 'Tech Support', 'Paperless Billing', 'Monthly Charges', 'Total Charges', 'DSL_Service', 'month-to-month_contract', 'one-year_contract', 'two-year_contract', 'bank-transfer-auto_paymentmethod', 'credit-card-auto_paymentmethod', 'electronic-check_paymentmethod']]
new_original_features_test = X_test_original[['Partner', 'Dependents', 'Tech Support', 'Paperless Billing', 'Monthly Charges', 'Total Charges', 'DSL_Service', 'month-to-month_contract', 'one-year_contract', 'two-year_contract', 'bank-transfer-auto_paymentmethod', 'credit-card-auto_paymentmethod', 'electronic-check_paymentmethod']]
dt_original.fit(new_original_features_train, y_train_original)
original_predicted = dt_original.predict(new_original_features_test)
# for original features
print("Accuracy Score (with original features): ")
print(accuracy_score(y_test_original, original_predicted), "\n")
print("Precision Score (with original features): ")
print(precision_score(y_test_original, original_predicted), "\n")
print("Recall Score (with original features): ")
print(recall_score(y_test_original, original_predicted), "\n")
print("F1 Score (with original features): ")
print(f1_score(y_test_original, original_predicted), "\n", "\n")

Accuracy Score (with original features): 
0.7371822490305903 

Precision Score (with original features): 
0.5166112956810631 

Recall Score (with original features): 
0.4936507936507937 

F1 Score (with original features): 
0.5048701298701299 
 

