In [3]:
import pandas as pd
dataset=pd.read_csv('zeolite_Amorphous.csv', index_col=0, header=0)
x=pd.concat([dataset.iloc[:, 0:5], dataset.iloc[:, 6:8]], axis=1)
y=dataset.iloc[:, 8]
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test=train_test_split(x, y, test_size=0.25, shuffle=True, stratify=y, random_state=4)

from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_predict
import numpy as np
fold=StratifiedKFold(n_splits=3, shuffle=True, random_state=99)
max_depths=np.arange(1, 21, 1)
accuracy_in_cv_all=[]
for max_depth in max_depths:
    model=DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=3)
    estimated_y_in_cv=cross_val_predict(model, x_train, y_train, cv=fold)
    accuracy_in_cv_all.append(metrics.accuracy_score(y_train, estimated_y_in_cv))

optimal_depth=max_depths[accuracy_in_cv_all.index(max(accuracy_in_cv_all))]

model=DecisionTreeClassifier(max_depth=optimal_depth, min_samples_leaf=3)
model.fit(x_train, y_train)
estimated_y_train=model.predict(x_train)
estimated_y_test=model.predict(x_test)
phase=list(set(y))
phase.sort()

confusion_matrix_train=pd.DataFrame(metrics.confusion_matrix(y_train, estimated_y_train), index=phase, columns=phase)
print(metrics.accuracy_score(y_train, estimated_y_train))
display(confusion_matrix_train)

confusion_matrix_test=pd.DataFrame(metrics.confusion_matrix(y_test, estimated_y_test), index=phase, columns=phase)
print(metrics.accuracy_score(y_test, estimated_y_test))
display(confusion_matrix_test)

importance_cv=pd.DataFrame(model.feature_importances_)
importance_cv.columns=['importance']
importance_cv.index=x.columns
importance_cv

0.886039886039886


Unnamed: 0,ANA,Amorphous,BPH,CAN,CHA,EDI,ERI/OFF,FAU,GIS,HEU,...,KFI,LTA,LTL,MER,MFI,MOR,PHI,RHO,SOD,TON
ANA,13,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
Amorphous,0,23,0,0,0,0,0,1,1,0,...,0,0,1,0,0,0,0,0,0,0
BPH,0,0,5,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CAN,0,0,0,8,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
CHA,0,1,0,0,6,0,0,0,0,0,...,0,0,2,1,0,0,0,0,0,0
EDI,0,1,1,0,1,11,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ERI/OFF,0,0,0,0,0,0,16,0,0,0,...,0,0,0,0,0,0,1,0,0,0
FAU,0,1,0,0,0,0,0,33,1,0,...,0,1,0,0,0,0,0,0,0,0
GIS,1,0,0,0,0,0,0,0,15,0,...,0,0,0,0,0,2,0,0,0,0
HEU,0,0,0,0,0,0,0,0,0,2,...,0,0,0,0,0,0,1,0,0,0


0.7008547008547008


Unnamed: 0,ANA,Amorphous,BPH,CAN,CHA,EDI,ERI/OFF,FAU,GIS,HEU,...,KFI,LTA,LTL,MER,MFI,MOR,PHI,RHO,SOD,TON
ANA,3,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
Amorphous,0,7,0,0,0,0,0,0,1,0,...,0,0,1,0,0,0,0,0,0,0
BPH,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CAN,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
CHA,0,0,0,0,1,0,0,0,0,0,...,0,0,1,1,0,0,0,0,0,0
EDI,0,1,0,0,0,3,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
ERI/OFF,0,0,0,0,0,0,5,0,0,0,...,0,0,0,0,0,0,0,0,0,0
FAU,0,0,0,0,1,0,0,7,3,0,...,0,1,0,0,0,0,0,0,0,0
GIS,0,0,0,0,0,0,0,0,4,0,...,0,0,0,0,0,2,0,0,0,0
HEU,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


Unnamed: 0,importance
Al2O3/SiO2,0.203621
H2O/SiO2,0.13049
NaOH/SiO2,0.215035
KOH/SiO2,0.166775
CsOH/SiO2,0.028777
temperature,0.186936
time,0.068366


In [4]:
confusion_matrix_test.to_csv('confusion_matrix_test_DT.csv')

In [7]:
from sklearn.tree import export_graphviz
from graphviz import Source
with open('zeolite_Amorphous_standard_3', 'w') as f:
    export_graphviz(model, out_file=f, feature_names=x.columns, class_names=model.classes_)
dot_file_path='zeolite_Amorphous_standard_3'
決定木=Source.from_file(dot_file_path)
決定木.view()

'zeolite_Amorphous_standard_3.pdf'

In [8]:
y_train.value_counts()

phase
FAU          36
LTL          35
MOR          35
LTA          29
Amorphous    26
MER          23
MFI          22
SOD          18
GIS          18
ERI/OFF      17
ANA          15
EDI          14
RHO          10
CHA          10
JBW           9
CAN           9
PHI           8
BPH           5
TON           5
KFI           4
HEU           3
Name: count, dtype: int64

In [1]:
import pandas as pd
dataset=pd.read_csv('zeolite_Amorphous.csv', index_col=0, header=0)
x=pd.concat([dataset.iloc[:, 0:2], dataset.iloc[:, 5:8]], axis=1)
y=dataset.iloc[:, 8]
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test=train_test_split(x, y, test_size=0.25, shuffle=True, stratify=y, random_state=4)

from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_predict
import numpy as np
fold=StratifiedKFold(n_splits=3, shuffle=True, random_state=99)
max_depths=np.arange(1, 21, 1)
accuracy_in_cv_all=[]
for max_depth in max_depths:
    model=DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=3)
    estimated_y_in_cv=cross_val_predict(model, x_train, y_train, cv=fold)
    accuracy_in_cv_all.append(metrics.accuracy_score(y_train, estimated_y_in_cv))

optimal_depth=max_depths[accuracy_in_cv_all.index(max(accuracy_in_cv_all))]

model=DecisionTreeClassifier(max_depth=optimal_depth, min_samples_leaf=3)
model.fit(x_train, y_train)
estimated_y_train=model.predict(x_train)
estimated_y_test=model.predict(x_test)
phase=list(set(y))
phase.sort()

confusion_matrix_train=pd.DataFrame(metrics.confusion_matrix(y_train, estimated_y_train), index=phase, columns=phase)
print(metrics.accuracy_score(y_train, estimated_y_train))
display(confusion_matrix_train)

confusion_matrix_test=pd.DataFrame(metrics.confusion_matrix(y_test, estimated_y_test), index=phase, columns=phase)
print(metrics.accuracy_score(y_test, estimated_y_test))
display(confusion_matrix_test)

importance_cv=pd.DataFrame(model.feature_importances_)
importance_cv.columns=['importance']
importance_cv.index=x.columns
importance_cv

0.8433048433048433


Unnamed: 0,ANA,Amorphous,BPH,CAN,CHA,EDI,ERI/OFF,FAU,GIS,HEU,...,KFI,LTA,LTL,MER,MFI,MOR,PHI,RHO,SOD,TON
ANA,14,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
Amorphous,0,20,0,0,1,0,0,1,0,0,...,0,0,1,0,0,1,0,0,0,2
BPH,0,0,4,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CAN,2,0,0,5,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
CHA,0,0,0,0,7,0,0,0,0,0,...,0,0,1,1,0,0,1,0,0,0
EDI,0,1,0,0,0,13,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ERI/OFF,0,1,0,0,0,0,14,0,0,0,...,0,0,1,1,0,0,0,0,0,0
FAU,0,1,0,0,1,2,0,30,1,0,...,0,1,0,0,0,0,0,0,0,0
GIS,2,0,0,0,0,0,0,0,15,0,...,0,0,0,0,0,0,1,0,0,0
HEU,0,1,0,0,0,0,0,0,0,2,...,0,0,0,0,0,0,0,0,0,0


0.6581196581196581


Unnamed: 0,ANA,Amorphous,BPH,CAN,CHA,EDI,ERI/OFF,FAU,GIS,HEU,...,KFI,LTA,LTL,MER,MFI,MOR,PHI,RHO,SOD,TON
ANA,4,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
Amorphous,0,7,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0
BPH,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CAN,2,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CHA,1,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
EDI,0,0,0,0,0,3,0,1,0,0,...,0,0,0,1,0,0,0,0,0,0
ERI/OFF,0,0,0,0,0,0,5,0,0,0,...,0,0,0,1,0,0,0,0,0,0
FAU,0,0,0,0,1,1,0,6,1,0,...,0,1,0,0,0,0,2,0,0,0
GIS,0,0,0,0,0,0,0,0,4,0,...,0,0,1,0,0,0,1,0,0,0
HEU,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


Unnamed: 0,importance
Al2O3/SiO2,0.249914
H2O/SiO2,0.13049
MOH/SiO2,0.279473
temperature,0.220882
time,0.119241
