# Importação das bibliotecas e upload do dataset

## Blibliotecas


In [1]:
import tensorflow.keras as keras
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

ModuleNotFoundError: No module named 'tensorflow'

In [5]:
import pandas as pd
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
from sklearn.model_selection import RandomizedSearchCV

from sklearn.model_selection import GridSearchCV 
from sklearn.metrics import accuracy_score,  precision_score

from sklearn.preprocessing import label_binarize, StandardScaler
from sklearn.model_selection import train_test_split, KFold, RepeatedKFold, RepeatedStratifiedKFold,StratifiedKFold

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

import plotly
plotly.io.renderers.default = 'colab'
import plotly.graph_objects as go
import plotly.express as px


## Datasets


### train

In [2]:
columns = ['Feed concentration','Feed flowrate','Feed temperature','Reactor level','Product A concentration',\
        'Product B concentration','Reactor temperature','Coolant flowrate','Product flowrate','Coolant inlet temperature',\
        'Coolant inlet pressure','Level controller output','Coolant controller output','Coolant setpoint','status']

In [3]:
#Importação do arquivo de Train
df_train = pd.read_csv('https://raw.githubusercontent.com/ArturGuilherme/tcc-avaliacao-classificadores/master/data/baseCompleta.csv',names=columns,sep=';',decimal='.')

#balanceamento da classe de status 0
df_status_0 = df_train[df_train['status']==0].sample(n=1010,random_state=42)
df_status_dif_0 = df_train[df_train['status']!=0]

df_train = pd.concat([df_status_0,df_status_dif_0],ignore_index=True)

# Normalização e split train/test

In [4]:
scaler = StandardScaler()

Y_train = df_train.status.values
X_train = df_train.iloc[:,0:14].to_numpy()
X_train = scaler.fit_transform(X_train)

labels = sorted(list(dict.fromkeys(Y_train))) # Monta uma lista com os os rotulos, sem valores duplicados e de ordem crescente

X_train,X_valid,Y_train,Y_valid = train_test_split(X_train,Y_train,test_size=0.30,random_state=42)

# KNN

In [None]:
acc = []
for i in range(1,50):  

  ######## Geração do Modelo ########
  knn_model = KNeighborsClassifier(n_neighbors = i)
  knn_model.fit(X_train,Y_train)

  ######## Prediação ########
  y_pred = knn_model.predict(X_valid)

  ######## Metricas globais do modelo conforme media de ponterada de cada rotulo ########
  acc.append(accuracy_score(Y_valid,y_pred))

In [None]:
acc = (np.array(acc)*100).tolist()
k_value = acc.index(max(acc))+1
max_acc = max(acc)

In [None]:
x = np.array(range(1,50))

title = 'Acurária vs K-value' \
        '<br><span style="font-size:10px"><i>K-value igual a '+str(k_value)+' possui a maior acurária: '+str(round(max_acc,2))+'%</span></i>'

layout = go.Layout(
    xaxis=dict(
        title="K-value"
    ),
    yaxis=dict(
        title="Acurária (%)"
    ) ) 

fig = go.Figure(data=go.Scatter(x=x,y=acc,mode='lines+markers'),layout=layout)
fig.update_layout(
    title=title,
    title_font_color = '#333333', # Grey is always better to not draw much attention
    title_font_size = 14,

    autosize=False,
    width=700,
    height=500,
    
    )

fig.show()

# Randon Forest

In [13]:
param_grid = {
    'bootstrap': [True],
    'max_depth': [20,50,60],
    'min_samples_leaf': [2,3, 4],
    'min_samples_split': [7,8],
    'n_estimators': [600,700]
}

# param_grid = {
#     'bootstrap': [True],
#     'max_depth': [80],
#     'min_samples_leaf': [3],
#     'min_samples_split': [12],
#     'n_estimators': [500]
# }

# Create a base model
rf = RandomForestClassifier(random_state = 42)

# Instantiate the grid search model
# grid_search_final = GridSearchCV(estimator = rf_model, param_grid = param_grid, 
#                                 cv = 3, verbose = 1)

rf_random = RandomizedSearchCV(estimator = rf,param_distributions = param_grid,n_iter = 100, cv = 3, verbose=2, random_state=35, n_jobs = -1)

rf_random.fit(X_train, Y_train)

rf_random.best_params_

Fitting 3 folds for each of 36 candidates, totalling 108 fits


{'n_estimators': 600,
 'min_samples_split': 8,
 'min_samples_leaf': 3,
 'max_depth': 50,
 'bootstrap': True}

# Decision Tree

In [11]:
# params = {'criterion':['gini','entropy'],
#           'max_leaf_nodes': [100], 
#           'min_samples_split':[5],
#           'max_depth':[10,20,30]
#           }

params = {'criterion':['gini'],
          'max_leaf_nodes': [60,70,80,100,130,150], 
          'min_samples_split':[2,3,4,5,6,8,9],
          'max_depth':[20,50,60,70,10]
          }

dt = DecisionTreeClassifier(random_state=42)

rf_random = RandomizedSearchCV(estimator=dt ,param_distributions = params,n_iter = 200, cv = 3, verbose=2, random_state=35, n_jobs = -1)

# grid_search_cv = GridSearchCV(DecisionTreeClassifier(random_state=42), params, verbose=1, cv=3)

rf_random.fit(X_train, Y_train)

rf_random.best_params_

Fitting 3 folds for each of 200 candidates, totalling 600 fits


{'min_samples_split': 5,
 'max_leaf_nodes': 70,
 'max_depth': 70,
 'criterion': 'gini'}

In [None]:
export_graphviz( 
 grid_search_cv.best_estimator_,
 out_file=('moons_tree.dot'),
 feature_names=None,
 class_names=None,
 filled=True,
)