In [160]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [161]:
wax_data_selected = pd.read_excel("../data/processed/wax_selected_1.xls", index_col=0)
X = wax_data_selected.drop("wax", axis=1)
y = wax_data_selected["wax"]

In [162]:
wax_data_selected_2 = pd.read_excel("../data/processed/wax_selected_2.xls", index_col=0)
X2 = wax_data_selected_2.drop("wax_F2", axis=1)
y2 = wax_data_selected_2["wax_F2"]

In [163]:
print(X.shape)
print(y.shape)

(184, 38)
(184,)


In [164]:
print(X2.shape)
print(y2.shape)

(184, 32)
(184,)


In [165]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=90)

X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(X2, y2, test_size=0.4, random_state=90)

In [166]:
# Logstic regression

In [167]:
# Data set wax 1
lr = LogisticRegression(C=5.163986277024462, penalty="l1",
                        solver="liblinear", random_state=90)
lr.fit(X_train, y_train)
lr_predicted = lr.predict(X_test)

In [168]:
print(classification_report(y_test, lr_predicted))

              precision    recall  f1-score   support

           0       0.85      1.00      0.92        11
           1       1.00      0.97      0.98        63

    accuracy                           0.97        74
   macro avg       0.92      0.98      0.95        74
weighted avg       0.98      0.97      0.97        74



In [183]:
# Data set wax 2
lr_2 = LogisticRegression(C=5.706675868681398, penalty="l1",
                        solver="liblinear", random_state=90)
lr_2.fit(X_train_2, y_train_2)

lr_predicted_2 = lr_2.predict(X_test_2)

In [184]:
print(classification_report(y_test_2, lr_predicted_2))

              precision    recall  f1-score   support

           0       1.00      0.91      0.95        11
           1       0.98      1.00      0.99        63

    accuracy                           0.99        74
   macro avg       0.99      0.95      0.97        74
weighted avg       0.99      0.99      0.99        74



In [185]:
# Radnom forest

In [186]:
# Data set wax 1

rf = RandomForestClassifier(n_estimators=138, random_state=90)
rf.fit(X_train, y_train)
rf_predicted = rf.predict(X_test)

In [187]:
print(classification_report(y_test, rf_predicted))

              precision    recall  f1-score   support

           0       0.91      0.91      0.91        11
           1       0.98      0.98      0.98        63

    accuracy                           0.97        74
   macro avg       0.95      0.95      0.95        74
weighted avg       0.97      0.97      0.97        74



In [188]:
# Data set wax 2

rf_2 = RandomForestClassifier(n_estimators=62, random_state=90)
rf_2.fit(X_train_2, y_train_2)
rf_predicted_2 = rf_2.predict(X_test_2)

In [189]:
print(classification_report(y_test_2, rf_predicted_2))

              precision    recall  f1-score   support

           0       0.85      1.00      0.92        11
           1       1.00      0.97      0.98        63

    accuracy                           0.97        74
   macro avg       0.92      0.98      0.95        74
weighted avg       0.98      0.97      0.97        74



In [176]:
# XGBClassifier

In [190]:
# Data set wax 1

xgb = XGBClassifier(n_estimators=155, reg_alpha=5.163986277024462, random_state=90)
xgb.fit(X_train, y_train)
xgb_predicted = xgb.predict(X_test)

In [191]:
print(classification_report(y_test, xgb_predicted))

              precision    recall  f1-score   support

           0       0.91      0.91      0.91        11
           1       0.98      0.98      0.98        63

    accuracy                           0.97        74
   macro avg       0.95      0.95      0.95        74
weighted avg       0.97      0.97      0.97        74



In [192]:
# Data set wax 2

xgb2 = XGBClassifier(n_estimators=355, reg_alpha=5.163986277024462, random_state=90)
xgb2.fit(X_train_2, y_train_2)
xgb_predicted_2 = xgb2.predict(X_test_2)

In [193]:
print(classification_report(y_test_2, xgb_predicted_2))

              precision    recall  f1-score   support

           0       0.92      1.00      0.96        11
           1       1.00      0.98      0.99        63

    accuracy                           0.99        74
   macro avg       0.96      0.99      0.97        74
weighted avg       0.99      0.99      0.99        74

