In [1]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
wax_data_selected_A = pd.read_excel("../data/processed/wax_selected_A.xls", index_col=0)
X_A = wax_data_selected_A.drop("wax", axis=1)
y_A = wax_data_selected_A["wax"]

In [3]:
wax_data_selected_B = pd.read_excel("../data/processed/wax_selected_B.xls", index_col=0)
X_B = wax_data_selected_B.drop("wax_F2", axis=1)
y_B = wax_data_selected_B["wax_F2"]

In [4]:
print(X_A.shape)
print(y_A.shape)

(184, 17)
(184,)


In [5]:
print(X_B.shape)
print(y_B.shape)

(184, 35)
(184,)


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X_A, y_A, test_size=0.4, random_state=90)

X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(X_B, y_B, test_size=0.4, random_state=90)

In [7]:
# Logstic regression

In [8]:
# Data set wax A
lr = LogisticRegression(C=0.2847422647809694, penalty="l1",
                        solver="liblinear", random_state=90)
lr.fit(X_train, y_train)
lr_predicted = lr.predict(X_test)

In [9]:
print(classification_report(y_test, lr_predicted))

              precision    recall  f1-score   support

           0       0.85      1.00      0.92        11
           1       1.00      0.97      0.98        63

    accuracy                           0.97        74
   macro avg       0.92      0.98      0.95        74
weighted avg       0.98      0.97      0.97        74



In [10]:
# Data set wax B
lr_2 = LogisticRegression(C=2.755725448969536, penalty="l1",
                        solver="liblinear", random_state=90)
lr_2.fit(X_train_2, y_train_2)

lr_predicted_2 = lr_2.predict(X_test_2)

In [11]:
print(classification_report(y_test_2, lr_predicted_2))

              precision    recall  f1-score   support

           0       0.92      1.00      0.96        11
           1       1.00      0.98      0.99        63

    accuracy                           0.99        74
   macro avg       0.96      0.99      0.97        74
weighted avg       0.99      0.99      0.99        74



In [12]:
# Radnom forest

In [13]:
# Data set wax A

rf = RandomForestClassifier(n_estimators=138, random_state=90)
rf.fit(X_train, y_train)
rf_predicted = rf.predict(X_test)

In [14]:
print(classification_report(y_test, rf_predicted))

              precision    recall  f1-score   support

           0       0.92      1.00      0.96        11
           1       1.00      0.98      0.99        63

    accuracy                           0.99        74
   macro avg       0.96      0.99      0.97        74
weighted avg       0.99      0.99      0.99        74



In [15]:
# Data set wax B

rf_2 = RandomForestClassifier(n_estimators=69, random_state=90)
rf_2.fit(X_train_2, y_train_2)
rf_predicted_2 = rf_2.predict(X_test_2)

In [16]:
print(classification_report(y_test_2, rf_predicted_2))

              precision    recall  f1-score   support

           0       0.92      1.00      0.96        11
           1       1.00      0.98      0.99        63

    accuracy                           0.99        74
   macro avg       0.96      0.99      0.97        74
weighted avg       0.99      0.99      0.99        74



In [17]:
# XGBClassifier

In [18]:
# Data set wax A

xgb = XGBClassifier(n_estimators=67, reg_alpha=5.163986277024462, random_state=90)
xgb.fit(X_train, y_train)
xgb_predicted = xgb.predict(X_test)

In [19]:
print(classification_report(y_test, xgb_predicted))

              precision    recall  f1-score   support

           0       0.91      0.91      0.91        11
           1       0.98      0.98      0.98        63

    accuracy                           0.97        74
   macro avg       0.95      0.95      0.95        74
weighted avg       0.97      0.97      0.97        74



In [20]:
# Data set wax B

xgb2 = XGBClassifier(n_estimators=432, reg_alpha=6.852769816973126, random_state=90)
xgb2.fit(X_train_2, y_train_2)
xgb_predicted_2 = xgb2.predict(X_test_2)

In [21]:
print(classification_report(y_test_2, xgb_predicted_2))

              precision    recall  f1-score   support

           0       0.92      1.00      0.96        11
           1       1.00      0.98      0.99        63

    accuracy                           0.99        74
   macro avg       0.96      0.99      0.97        74
weighted avg       0.99      0.99      0.99        74

