In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier

In [None]:
df = pd.read_csv('C:\\Users\\Olga\\Desktop\\ARCELOR_GITHUB\\arcelor_mittal\\data\\output_1.csv')
df.set_index("coil", inplace=True)
df.head()

In [None]:
columns_to_drop = ['Constriction_width', 'analyse','Temperature before finishing mill', 'furnace Number', 'Temperature after finishing mill','Thickness profile']  
df = df.drop(columns=columns_to_drop) 
df.head()

In [None]:
X = df.drop('is_constriction', axis=1)

y = df['is_constriction']

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, stratify = y)

In [None]:
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score

In [None]:
def summarize_classification(y_test, y_pred):
    
    acc = accuracy_score(y_test, y_pred, normalize=True)
    num_acc = accuracy_score(y_test, y_pred, normalize=False)

    prec = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    cr = metrics.classification_report(y_test, y_pred) # classificatiob report
    
    print("Test data count: ",len(y_test))
    print("accuracy_count : " , num_acc)
    print("accuracy_score : " , acc)
    print("precision_score : " , prec)
    print("recall_score : ", recall)
    print("class_report : ", cr)
    print()

In [None]:
from sklearn.model_selection import GridSearchCV

#parameters = {'max_depth': [2, 4, 5, 7, 9, 10]}
parameters = {'max_depth': [2, 4, 5, 7, 9, 10], 'min_samples_split': [2, 4, 5, 7, 9, 10], 'min_samples_split': range(1,5) }

grid_search = GridSearchCV(DecisionTreeClassifier(), parameters, cv=3, return_train_score=True, scoring = 'recall')
grid_search.fit(x_train, y_train)

grid_search.best_params_

In [None]:
for i in range(6):
    print('Parameters: ', grid_search.cv_results_['params'][i])

    print('Mean Test Score: ', grid_search.cv_results_['mean_test_score'][i])
    
    print('Rank: ', grid_search.cv_results_['rank_test_score'][i])

In [None]:
decision_tree_model = DecisionTreeClassifier( \
    max_depth = grid_search.best_params_['max_depth']).fit(x_train, y_train)

In [None]:
y_pred = decision_tree_model.predict(x_test)

In [None]:
summarize_classification(y_test, y_pred)

In [None]:
#Quadratic Discriminant

In [None]:
parameters = {'reg_param': [0.1, 0.2, 0.3, 0.4, 0.5]}

grid_search = GridSearchCV(QuadraticDiscriminantAnalysis(), parameters, cv=4, return_train_score=True, scoring = 'recall')
grid_search.fit(x_train, y_train)

grid_search.best_params_

In [None]:
quadratic_discriminant_a_model = QuadraticDiscriminantAnalysis( \
    reg_param=grid_search.best_params_['reg_param']). \
    fit(x_train, y_train)

In [None]:
y_pred = quadratic_discriminant_a_model.predict(x_test)

In [None]:
summarize_classification(y_test, y_pred)

In [None]:
parameters = {'n_estimators': [10, 30, 50, 100],
               'max_features': ['sqrt', 0.25, 0.5, 0.75, 1.0],
               'max_depth' : [4,5,6,7,8],
              }

grid_search = GridSearchCV(RandomForestClassifier(), parameters, cv=4, return_train_score=True, scoring = 'recall')
grid_search.fit(x_train, y_train)

grid_search.best_params_

In [None]:
rmf_model = RandomForestClassifier( \
    n_estimators = grid_search.best_params_['n_estimators'], max_features = grid_search.best_params_['max_features'], max_depth = grid_search.best_params_['max_depth']). \
    fit(x_train, y_train)

In [None]:
y_pred = rmf_model.predict(x_test)

In [None]:
summarize_classification(y_test, y_pred)