In [1]:
import matplotlib.pyplot as plt
import warnings

import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

from sklearn.metrics import confusion_matrix,classification_report
from sklearn.model_selection import GridSearchCV,train_test_split
from sklearn.model_selection import cross_val_score,cross_val_predict

from imblearn.over_sampling import SMOTE
from sklearn.feature_selection import RFE


warnings.simplefilter(action="ignore")


In [None]:
test=pd.read_csv("/Users/USER/Documents/DATASETS/Zindi/electricity_fraud_detection/cleaned_client_test.csv")
train=pd.read_csv("/Users/USER/Documents/DATASETS/Zindi/electricity_fraud_detection/cleaned_client_train.csv")


### PURE MODEL

In [None]:
tree=DecisionTreeClassifier(random_state=365)

tree.fit(xtrain,ytrain)

In [None]:
pure_tree_accuracy=cross_val_score(tree,xtrain,ytrain,scoring='accuracy',cv=3).mean()
print('Mean_accuracy_score is',pure_tree_accuracy)

In [None]:
pure_tree_scores=cross_val_predict(tree,xtrain,ytrain,cv=3)

In [None]:
sns.heatmap(confusion_matrix(ytrain,pure_tree_scores),annot=True)

print(classification_report(ytrain,pure_tree_scores))

### DECISION TREE WITH PRUNING

In [None]:
pruned_tree=DecisionTreeClassifier(random_state=365,ccp_alpha=0.0001)

In [None]:
pruned_tree.fit(xtrain,ytrain)

In [None]:
pruned_tree_accuracy=cross_val_score(pruned_tree,xtrain,ytrain,scoring='accuracy',cv=3).mean()
print('Mean_accuracy_score is',pruned_tree_accuracy)

In [None]:
pruned_tree_scores=cross_val_predict(pruned_tree,xtrain,ytrain,cv=3)

In [None]:
sns.heatmap(confusion_matrix(ytrain,pruned_tree_scores),annot=True)

### DECISION TREE WITH SMOTE

In [None]:
smote_tree = DecisionTreeClassifier(random_state=365)

In [None]:
#DEALING WITH IMBALANCE WITH SMOTE

from imblearn.over_sampling import SMOTE

os=SMOTE(random_state=365)

xresampled,yresampled=os.fit_resample(xtrain,ytrain)

In [None]:
smote_tree.fit(xresampled,yresampled)

In [None]:
smote_tree_accuracy=cross_val_score(smote_tree,xresampled,yresampled,scoring='accuracy',cv=3).mean()
print('Mean_accuracy_score is',smote_tree_accuracy)

In [None]:
smote_tree_scores=cross_val_predict(smote_tree,xresampled,yresampled,cv=3)

In [None]:
sns.heatmap(confusion_matrix(yresampled,smote_tree_scores),annot=True)

In [None]:
print(classification_report(yresampled,smote_tree_scores))

### DECISION TREE WITH SMOTE AND PRUNING 

In [None]:
smote_prun_tree = DecisionTreeClassifier(random_state=365,ccp_alpha=0.01)

smote_prun_tree.fit(xresampled,yresampled)

In [None]:
smote_prun_tree_acc=cross_val_score(smote_prun_tree,xresampled,yresampled,scoring='accuracy',cv=3).mean()
print('Mean_accuracy_score is',smote_prun_tree_acc)

In [None]:
smote_prun_tree_scores=cross_val_predict(smote_prun_tree,xresampled,yresampled,cv=3)

In [None]:
sns.heatmap(confusion_matrix(yresampled,smote_prun_tree_scores),annot=True)

In [None]:
print(classification_report(yresampled,smote_prun_tree_scores))

### DECISION TREE WITH RECURSIVE FEATURE ELIMINATION


In [None]:
rfe_tree = DecisionTreeClassifier(random_state=365)

In [None]:
rfe=RFE(estimator=rfe_tree,n_features_to_select=8)
rfe=rfe.fit(xresampled,yresampled.values.ravel())

print(rfe.support_)
print(rfe.ranking_)
