<a href="https://colab.research.google.com/github/HariPrasad6221/Surface-Defects/blob/main/defects.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
import holoviews as hv
from holoviews import opts
hv.extension('bokeh')
from sklearn.preprocessing import StandardScaler, LabelEncoder
import os
from scipy import signal
import lightgbm as lgb
import shap

In [None]:
experiment_result = pd.read_csv("../input/cnc-tool-wear/train.csv")
print( experiment_result.shape)
experiment_result.head(3)

In [None]:
experiment_tmp = pd.read_csv("../input/cnc-tool-wear/experiment_01.csv")
print(experiment_tmp.shape)
print(experiment_tmp.columns)
experiment_tmp.head(3)

In [None]:
experiment_result['passed_visual_inspection'] = experiment_result['passed_visual_inspection'].fillna('no')

In [None]:
frames = []
for i in range(1,19):
    exp_num = '0' + str(i) if i < 10 else str(i)
    frame = pd.read_csv(f"../input/cnc-tool-wear/experiment_{exp_num}.csv")
    exp_result_row = experiment_result[experiment_result['No'] == i]
    frame['exp_num'] = i
    frame['material'] = exp_result_row.iloc[0]['material']
    frame['feedrate'] = exp_result_row.iloc[0]['feedrate']
    frame['clamp_pressure'] = exp_result_row.iloc[0]['clamp_pressure']
    frame['tool_condition'] = exp_result_row.iloc[0]['tool_condition']
    frame['machining_finalized'] = exp_result_row.iloc[0]['machining_finalized']
    frame['passed_visual_inspection'] = exp_result_row.iloc[0]['passed_visual_inspection']
    frames.append(frame)

df = pd.concat(frames, ignore_index = True)
df.head(3)

In [None]:
df['Machining_Process'].value_counts().sort_index()

In [None]:
df.replace({'Machining_Process': {'Starting':'Prep','end':'End'}}, inplace=True)

In [None]:
feedrate = hv.Distribution(df['feedrate']).opts(title="Distribution of feedrate", color="green", xlabel="Feedrate", ylabel="Density")
clamp = hv.Distribution(df['clamp_pressure']).opts(title="Distribution of clamp pressure", color="green", xlabel="Pressure", ylabel="Density")
material = hv.Bars(df['material'].value_counts()).opts(title="Material Count", color="green", xlabel="Material", ylabel="Count")
(feedrate + clamp + material).opts(opts.Bars(width=300, height=300,tools=['hover'],show_grid=True)).cols(2)

In [None]:
tool_df = np.round(df['tool_condition'].value_counts(normalize=True) * 100)
finalized_df = np.round(df['machining_finalized'].value_counts(normalize=True) * 100)
vis_passed_df = np.round(df['passed_visual_inspection'].value_counts(normalize=True) * 100)
tool_wear = hv.Bars(tool_df).opts(title="Tool Wear Count", color="green", xlabel="Worn/Unworn", ylabel="Percentage", yformatter='%d%%')
finalized = hv.Bars(finalized_df).opts(title="Finalized Count", color="green", xlabel="Yes/No", ylabel="Percentage", yformatter='%d%%')
vis_inspection = hv.Bars(vis_passed_df).opts(title="Visual Inspection Passed Count", color="green", xlabel="Yes/No", ylabel="Percentage", yformatter='%d%%')
(tool_wear + finalized + vis_inspection).opts(opts.Bars(width=300, height=300,tools=['hover'],show_grid=True)).cols(2)

In [None]:
finalized_df_worn = np.round(df[df['tool_condition']=='worn']['machining_finalized'].value_counts(normalize=True) * 100)
finalized_df_unworn = np.round(df[df['tool_condition']=='unworn']['machining_finalized'].value_counts(normalize=True) * 100)
vis_passed_df_worn = np.round(df[df['tool_condition']=='worn']['passed_visual_inspection'].value_counts(normalize=True) * 100)
vis_passed_df_unworn = np.round(df[df['tool_condition']=='unworn']['passed_visual_inspection'].value_counts(normalize=True) * 100)
finalized_worn = hv.Bars(finalized_df_worn).opts(title="[WORN] Finalized Count", color="orange", xlabel="Yes/No", ylabel="Percentage", yformatter='%d%%')\
            * hv.Text('yes', 15, f"{np.round(finalized_df_worn['yes']/sum(finalized_df_worn)*100)}%")\
            * hv.Text('no', 15, f"{np.round(finalized_df_worn['no']/sum(finalized_df_worn)*100)}%")
finalized_unworn = hv.Bars(finalized_df_unworn).opts(title="[UNWORN] Finalized Count", color="orange", xlabel="Yes/No", ylabel="Percentage", yformatter='%d%%')\
            * hv.Text('yes', 15, f"{np.round(finalized_df_unworn['yes']/sum(finalized_df_unworn)*100)}%")\
            * hv.Text('no', 15, f"{np.round(finalized_df_unworn['no']/sum(finalized_df_unworn)*100)}%")
vis_inspection_worn = hv.Bars(vis_passed_df_worn).opts(title="[WORN] Visual Inspection Passed Count", color="green", xlabel="Yes/No", ylabel="Percentage", yformatter='%d%%')\
            * hv.Text('yes', 45, f"{np.round(vis_passed_df_worn['yes']/sum(vis_passed_df_worn)*100)}%")\
            * hv.Text('no', 45, f"{np.round(vis_passed_df_worn['no']/sum(vis_passed_df_worn)*100)}%")
vis_inspection_unworn = hv.Bars(vis_passed_df_unworn).opts(title="[UNWORN] Visual Inspection Passed Count", color="green", xlabel="Yes/No", ylabel="Percentage", yformatter='%d%%')\
            * hv.Text('yes', 15, f"{np.round(vis_passed_df_unworn['yes']/sum(vis_passed_df_unworn)*100)}%")\
            * hv.Text('no', 15, f"{np.round(vis_passed_df_unworn['no']/sum(vis_passed_df_unworn)*100)}%")
((finalized_worn + finalized_unworn) + (vis_inspection_worn + vis_inspection_unworn)).opts(opts.Bars(width=400, height=300,tools=['hover'],show_grid=True)).cols(2)

In [None]:
worn_fin_vis = pd.concat([finalized_df_worn, vis_passed_df_worn], axis=1,sort=True).rename(columns={'machining_finalized':'[WORN] Finalized', 'passed_visual_inspection':'[WORN] Visual Inspection Passed'})
worn_fin_vis = pd.melt(worn_fin_vis.reset_index(), ['index']).rename(columns={'index':'Yes/No', 'variable':'Outputs'})
hv.Bars(worn_fin_vis, ['Outputs','Yes/No'], 'value').opts(opts.Bars(title="Machining Finalized and Passed Visual Inspection by Worn Tool Count", width=700, height=400,tools=['hover'],\
                                                                show_grid=True, ylabel="Percentage", yformatter='%d%%'))

In [None]:
hv.Bars(df['Machining_Process'].value_counts()).opts(title="Machining Process Count", color="red", xlabel="Machining Processes", ylabel="Count")\
                                        .opts(opts.Bars(width=500, height=300,tools=['hover'],xrotation=45,show_grid=True))

In [None]:
def plot_ts(col, color='red', yformat='%d%%'):
    v_list = []
    for i in range(1,19):
        v = hv.Curve(df[df['exp_num']==i].reset_index()[col]).opts(title=f"{col} in  experiment {i}", xlabel="Time", ylabel=f"{col}", yformatter=yformat)\
                                                          .opts(width=300, height=150,tools=['hover'],show_grid=True,fontsize=8, color=color)
        v_list.append(v)
    return (v_list[0] + v_list[1] + v_list[2] + v_list[3] + v_list[4] + v_list[5] + v_list[6] + v_list[7] + v_list[8] + v_list[9] + v_list[10] + v_list[11] + v_list[12]\
            + v_list[13] + v_list[14] + v_list[15] + v_list[16] + v_list[17]).opts(shared_axes=False).cols(6)

In [None]:
plot_ts('X1_ActualVelocity', color='red', yformat='%d mm/s')

In [None]:
plot_ts('Y1_ActualVelocity', color='orange', yformat='%d mm/s')

In [None]:
plot_ts('Z1_ActualVelocity', color='green', yformat='%d mm/s')

In [None]:
plot_ts('S1_ActualVelocity', color='blue', yformat='%d mm/s')

In [None]:
plot_ts('X1_CurrentFeedback', color='red', yformat='%d A')

In [None]:
plot_ts('Y1_CurrentFeedback', color='orange', yformat='%d A')

In [None]:
plot_ts('Z1_CurrentFeedback', color='green', yformat='%d A')

In [None]:
plot_ts('S1_CurrentFeedback', color='blue', yformat='%d A')

In [None]:
plot_ts('X1_DCBusVoltage', color='red', yformat='%.1f V')

In [None]:
plot_ts('Y1_DCBusVoltage', color='orange', yformat='%.1f V')

In [None]:
plot_ts('Z1_DCBusVoltage', color='green', yformat='%.1f V')

In [None]:
plot_ts('S1_DCBusVoltage', color='blue', yformat='%.1f V')

In [None]:
g = sns.pairplot(df, hue='tool_condition', vars=["feedrate","clamp_pressure"])
g.fig.suptitle("Tool Condition - feedrate/clamp pressure", y=1.1, fontsize=20)
g.fig.set_figheight(6)
g.fig.set_figwidth(9)
plt.show()

In [None]:
g = sns.pairplot(df, hue='machining_finalized', vars=["feedrate","clamp_pressure"])
g.fig.suptitle("Machining Finalized - feedrate/clamp pressure", y=1.1, fontsize=20)
g.fig.set_figheight(6)
g.fig.set_figwidth(9)
plt.show()

In [None]:
g = sns.pairplot(df, hue='tool_condition', vars=['X1_ActualVelocity','Y1_ActualVelocity','Z1_ActualVelocity','S1_ActualVelocity'])
g.fig.suptitle("Tool Condition - velocity", y=1.1, fontsize=20)
g.fig.set_figheight(6)
g.fig.set_figwidth(9)
g.fig.get_children()[-1].set_bbox_to_anchor((1.1, 0.5, 0, 0))
plt.show()

In [None]:
g = sns.pairplot(df, hue='machining_finalized', vars=['X1_ActualVelocity','Y1_ActualVelocity','Z1_ActualVelocity','S1_ActualVelocity'])
g.fig.suptitle("Machining Finalized - velocity", y=1.1, fontsize=20)
g.fig.set_figheight(6)
g.fig.set_figwidth(9)
g.fig.get_children()[-1].set_bbox_to_anchor((1.1, 0.5, 0, 0))
plt.show()

In [None]:
np.seterr(divide='ignore', invalid='ignore')
g = sns.pairplot(df, hue='tool_condition', vars=['X1_CurrentFeedback','Y1_CurrentFeedback','Z1_CurrentFeedback','S1_CurrentFeedback'])
g.fig.suptitle("Tool Condition - Current", y=1.1, fontsize=20)
g.fig.set_figheight(6)
g.fig.set_figwidth(9)
g.fig.get_children()[-1].set_bbox_to_anchor((1.1, 0.5, 0, 0))
plt.show()

In [None]:
g = sns.pairplot(df, hue='machining_finalized', vars=['X1_CurrentFeedback','Y1_CurrentFeedback','Z1_CurrentFeedback','S1_CurrentFeedback'])
g.fig.suptitle("Machining Finalized - Current", y=1.1, fontsize=20)
g.fig.set_figheight(6)
g.fig.set_figwidth(9)
g.fig.get_children()[-1].set_bbox_to_anchor((1.1, 0.5, 0, 0))
plt.show()

In [None]:
g = sns.pairplot(df, hue='tool_condition', vars=['X1_DCBusVoltage','Y1_DCBusVoltage','Z1_DCBusVoltage','S1_DCBusVoltage'])
g.fig.suptitle("Tool Condition - Voltage", y=1.1, fontsize=20)
g.fig.set_figheight(6)
g.fig.set_figwidth(9)
g.fig.get_children()[-1].set_bbox_to_anchor((1.1, 0.5, 0, 0))
plt.show()

In [None]:
g = sns.pairplot(df, hue='machining_finalized', vars=['X1_DCBusVoltage','Y1_DCBusVoltage','Z1_DCBusVoltage','S1_DCBusVoltage'])
g.fig.suptitle("Machining Finalized - Voltage", y=1.1, fontsize=20)
g.fig.set_figheight(6)
g.fig.set_figwidth(9)
g.fig.get_children()[-1].set_bbox_to_anchor((1.1, 0.5, 0, 0))
plt.show()

In [None]:
def plot_fft(col, color='red', peak_thr=1):
    v_list = []
    dt = 0.1
    for i in range(1,19):
        f = df[df['exp_num']==i].reset_index()[col]
        N = len(f)
        t = np.arange(0, N*dt, dt)
        freq = np.linspace(0, 1.0/dt, N)
        F = np.fft.fft(f)
        F_abs = np.abs(F) / (N/2)
        F_abs[0] = F_abs[0] / 2
        maximal_idx = signal.argrelmax(F_abs, order=1)[0]
        peak_cut = peak_thr
        maximal_idx = maximal_idx[(F_abs[maximal_idx] > peak_cut) & (maximal_idx <= N/2)]

        v = hv.Curve((freq[:int(N/2)+1], F_abs[:int(N/2)+1])).opts(title=f"{col} in  experiment {i}", xlabel="Frequency(Hz)", ylabel=f"Amplitude")\
                                                          .opts(width=300, height=150,tools=['hover'],show_grid=True,fontsize=8, color=color)\
            * hv.Scatter((freq[maximal_idx], F_abs[maximal_idx])).opts(color='lime', size=5)

        v_list.append(v)
    return (v_list[0] + v_list[1] + v_list[2] + v_list[3] + v_list[4] + v_list[5] + v_list[6] + v_list[7] + v_list[8] + v_list[9] + v_list[10] + v_list[11] + v_list[12]\
            + v_list[13] + v_list[14] + v_list[15] + v_list[16] + v_list[17]).opts(shared_axes=False).cols(6)

In [None]:
plot_fft('X1_ActualVelocity', color='red', peak_thr=3)

In [None]:
plot_fft('Y1_ActualVelocity', color='orange', peak_thr=3)

In [None]:
plot_fft('Z1_ActualVelocity', color='green', peak_thr=3)

In [None]:
plot_fft('S1_ActualVelocity', color='blue', peak_thr=9)

In [None]:
plot_fft('X1_CurrentFeedback', color='red', peak_thr=1.2)

In [None]:
plot_fft('Y1_CurrentFeedback', color='orange', peak_thr=1.2)

In [None]:
plot_fft('Z1_CurrentFeedback', color='green', peak_thr=3)

In [None]:
plot_fft('X1_CurrentFeedback', color='blue', peak_thr=1.2)

In [None]:
plot_fft('X1_DCBusVoltage', color='red', peak_thr=0.015)

In [None]:
plot_fft('Y1_DCBusVoltage', color='orange', peak_thr=0.02)

In [None]:
plot_fft('Z1_DCBusVoltage', color='green', peak_thr=3)

In [None]:
plot_fft('S1_DCBusVoltage', color='blue', peak_thr=0.15)

In [None]:
for ax in ['X','Y','Z','S']:
    df[f'{ax}1_Position_Diff'] = abs(df[f'{ax}1_CommandPosition']-df[f'{ax}1_ActualPosition'])
    df[f'{ax}1_Velocity_Diff'] = abs(df[f'{ax}1_CommandVelocity']-df[f'{ax}1_ActualVelocity'])
    df[f'{ax}1_Acceleration_Diff'] = abs(df[f'{ax}1_CommandAcceleration']-df[f'{ax}1_ActualAcceleration'])

In [None]:
for col in ['ActualPosition','ActualVelocity','ActualAcceleration','CurrentFeedback','DCBusVoltage','OutputCurrent','OutputVoltage','OutputPower']:
    dt = 0.1
    for i in range(1,19):
        for ax in ['X','Y','Z','S']:
            try:
                f = df[df['exp_num']==i].reset_index()[f'{ax}1_{col}']
            except:
                continue

            N = len(f)
            t = np.arange(0, N*dt, dt)
            freq = np.linspace(0, 1.0/dt, N)
            F = np.fft.fft(f)
            F_abs = np.abs(F) / (N/2)
            F_abs[0] = F_abs[0] / 2
            maximal_idx = signal.argrelmax(F_abs, order=1)[0]

            high_amp = np.max(F_abs[maximal_idx]) if len(maximal_idx) > 0 else 0
            high_freq = freq[maximal_idx][np.argmax(F_abs[maximal_idx])] if len(maximal_idx) > 0 else 0

            df.loc[df['exp_num']==i,f'{ax}1_{col}_High_Amp'] = high_amp
            df.loc[df['exp_num']==i,f'{ax}1_{col}_High_Freq'] = high_freq
            df.loc[df['exp_num']==i,f'{ax}1_{col}_High_Amp_Freq'] = high_amp * high_freq

In [None]:
feature_df = df.copy()
feature_df['Machining_Process'] = LabelEncoder().fit_transform(feature_df['Machining_Process']).astype(np.int8)
feature_df['tool_condition'] = LabelEncoder().fit_transform(feature_df['tool_condition']).astype(np.int8)
feature_df['machining_finalized'] = LabelEncoder().fit_transform(feature_df['machining_finalized']).astype(np.int8)
feature_df['passed_visual_inspection'] = LabelEncoder().fit_transform(feature_df['passed_visual_inspection']).astype(np.int8)
feature_df.drop(['material','exp_num'], axis=1, inplace=True)
feature_df.head(3)

In [None]:
y_series = feature_df['tool_condition']
x_df = feature_df.drop(['tool_condition','machining_finalized','passed_visual_inspection'], axis=1)
X_train, X_valid, Y_train, Y_valid = train_test_split(x_df, y_series, test_size=0.2, random_state=0, stratify=y_series)

lgb_train = lgb.Dataset(X_train, Y_train)
lgb_valid = lgb.Dataset(X_valid, Y_valid, reference=lgb_train)

In [None]:
params = {
    'task' : 'train',
    'boosting' : 'gbdt',
    'objective': 'binary',
    'metric': 'l2',
    'num_leaves': 200,
    'feature_fraction': 1.0,
    'bagging_fraction': 1.0,
    'bagging_freq': 0,
    'min_child_samples': 5
}
gbm_tool_wear = lgb.train(params,
            lgb_train,
            num_boost_round=100,
            valid_sets=lgb_valid,
            early_stopping_rounds=100)

In [None]:
feature_imp = pd.DataFrame()
feature_imp['feature'] = gbm_tool_wear.feature_name()
feature_imp['importance'] = gbm_tool_wear.feature_importance()
hv.Bars(feature_imp.sort_values(by='importance', ascending=False)[0:31][::-1]).opts(title="Feature Importance", color="purple", xlabel="Features", ylabel="Importance", invert_axes=True)\
                            .opts(opts.Bars(width=700, height=700, tools=['hover'], show_grid=True))

In [None]:
t = lgb.plot_tree(gbm_tool_wear, figsize=(20, 20), precision=3, tree_index=1, show_info=['split_gain'])
plt.title('Visulalization of Tree in Tool Condition')
plt.show()

In [None]:
y_series = feature_df['machining_finalized']
x_df = feature_df.drop(['tool_condition','machining_finalized','passed_visual_inspection'], axis=1)
X_train, X_valid, Y_train, Y_valid = train_test_split(x_df, y_series, test_size=0.2, random_state=0, stratify=y_series)

lgb_train = lgb.Dataset(X_train, Y_train)
lgb_valid = lgb.Dataset(X_valid, Y_valid, reference=lgb_train)

In [None]:
params = {
    'task' : 'train',
    'boosting' : 'gbdt',
    'objective': 'binary',
    'metric': 'l2',
    'num_leaves': 200,
    'feature_fraction': 1.0,
    'bagging_fraction': 1.0,
    'bagging_freq': 0,
    'min_child_samples': 5
}
gbm_machining_finalized = lgb.train(params,
            lgb_train,
            num_boost_round=100,
            valid_sets=lgb_valid,
            early_stopping_rounds=100)

In [None]:
feature_imp = pd.DataFrame()
feature_imp['feature'] = gbm_machining_finalized.feature_name()
feature_imp['importance'] = gbm_machining_finalized.feature_importance()
hv.Bars(feature_imp.sort_values(by='importance', ascending=False)[0:31][::-1]).opts(title="Feature Importance", color="purple", xlabel="Features", ylabel="Importance", invert_axes=True)\
                            .opts(opts.Bars(width=700, height=700, tools=['hover'], show_grid=True))

In [None]:
t = lgb.plot_tree(gbm_machining_finalized, figsize=(20, 20), precision=3, tree_index=1, show_info=['split_gain'])
plt.title('Visulalization of Tree in Machining Finalized')
plt.show()

In [None]:
y_series = feature_df['passed_visual_inspection']
x_df = feature_df.drop(['tool_condition','machining_finalized','passed_visual_inspection'], axis=1)
X_train, X_valid, Y_train, Y_valid = train_test_split(x_df, y_series, test_size=0.2, random_state=0, stratify=y_series)

lgb_train = lgb.Dataset(X_train, Y_train)
lgb_valid = lgb.Dataset(X_valid, Y_valid, reference=lgb_train)

In [None]:
params = {
    'task' : 'train',
    'boosting' : 'gbdt',
    'objective': 'binary',
    'metric': 'l2',
    'num_leaves': 200,
    'feature_fraction': 1.0,
    'bagging_fraction': 1.0,
    'bagging_freq': 0,
    'min_child_samples': 5
}
gbm_passed_vis_inspection = lgb.train(params,
            lgb_train,
            num_boost_round=100,
            valid_sets=lgb_valid,
            early_stopping_rounds=100)

In [None]:
feature_imp = pd.DataFrame()
feature_imp['feature'] = gbm_passed_vis_inspection.feature_name()
feature_imp['importance'] = gbm_passed_vis_inspection.feature_importance()
hv.Bars(feature_imp.sort_values(by='importance', ascending=False)[0:31][::-1]).opts(title="Feature Importance", color="purple", xlabel="Features", ylabel="Importance", invert_axes=True)\
                            .opts(opts.Bars(width=700, height=700, tools=['hover'], show_grid=True))

In [None]:
t = lgb.plot_tree(gbm_passed_vis_inspection, figsize=(20, 20), precision=3, tree_index=1, show_info=['split_gain'])
plt.title('Visulalization of Tree in Passed Visual Inspection')
plt.show()

In [None]:
frames = list()
results = pd.read_csv("../input/cnc-tool-wear/train.csv")
for i in range(1,19):
    exp = '0' + str(i) if i < 10 else str(i)
    frame = pd.read_csv("../input/cnc-tool-wear/experiment_{}.csv".format(exp))
    row = results[results['No'] == i]
    frame['target'] = 1 if row.iloc[0]['tool_condition'] == 'worn' else 0
    frames.append(frame)
df = pd.concat(frames, ignore_index = True)
df.head()

In [None]:
df_correlation=df.corr()
df_correlation.dropna(thresh=1,inplace=True)
df_correlation.drop(columns=['Z1_CurrentFeedback','Z1_DCBusVoltage','Z1_OutputCurrent','Z1_OutputVoltage','S1_SystemInertia','target'],inplace=True)
plt.figure(figsize=(20,20))
sns.heatmap(df_correlation)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import metrics


import xgboost as xgb
from xgboost import XGBClassifier
from xgboost import plot_importance
import gc
%matplotlib inline


x=df.drop(columns=['target','Machining_Process'],axis=1)
y=np.array(df['target'])
X_train,X_test,y_train,y_test =train_test_split(x,y,train_size=0.8,random_state=100)

In [None]:
xgb_model=XGBClassifier()
xgb_model.fit(X_train,y_train)

In [None]:
y_pred = xgb_model.predict(X_test)
y_pred[:10]

In [None]:
auc = roc_auc_score(y_test, y_pred)
auc

In [None]:
from sklearn.metrics import confusion_matrix
cnf_matrix = confusion_matrix(y_test, y_pred)
cnf_matrix

In [None]:
from sklearn.metrics import roc_curve, auc
fpr, tpr, thresholds = roc_curve(y_test, y_pred)
roc_auc = auc(fpr, tpr)
roc_auc

In [None]:
def draw_roc( actual, probs ):
    fpr, tpr, thresholds = metrics.roc_curve( actual, probs,
                                              drop_intermediate = False )
    auc_score = metrics.roc_auc_score( actual, probs )
    plt.figure(figsize=(6, 4))
    plt.plot( fpr, tpr, label='ROC curve (area = %0.2f)' % auc_score )
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate or [1 - True Negative Rate]')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic example')
    plt.legend(loc="lower right")
    plt.show()

    return fpr, tpr, thresholds

In [None]:
draw_roc(y_test,y_pred)

In [None]:
count_points=70
c = [i for i in range(1,count_points+1,1)]
fig = plt.figure()
plt.plot(c,y_test[:count_points], color="blue", linewidth=2.5, linestyle="-")#Actual Plot in blue
plt.plot(c,y_pred[:count_points], color="red",  linewidth=2.5, linestyle="--")#predicted Plot in red
fig.suptitle('Actual and Predicted', fontsize=20)              # Plot heading
plt.xlabel('Index', fontsize=18)                               # X-label
plt.ylabel('Worn_status', fontsize=16)

In [None]:
plt.bar(range(len(xgb_model.feature_importances_)), xgb_model.feature_importances_)
plt.show()

In [None]:
features = [(df.columns[i], v) for i,v in enumerate(xgb_model.feature_importances_)]
features.sort(key=lambda x: x[1], reverse = True)
for item in features[:10]:
    print("{0}: {1:0.4f}".format(item[0], item[1]))