In [3]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import GradientBoostingClassifier
import pandas as pd

In [5]:
data = pd.read_csv('./csv/multi_classification_data.csv')

In [7]:
# Splitting the data into features and targets
X = data.iloc[:, :-7]
y_with_other = data.iloc[:, -7:]
y_without_other = data.iloc[:, -8:-1]

# Splitting the data into train and test sets
X_train_with, X_test_with, y_train_with, y_test_with = train_test_split(X, y_with_other, test_size=0.2, random_state=42)
X_train_without, X_test_without, y_train_without, y_test_without = train_test_split(X, y_without_other, test_size=0.2, random_state=42)

# Create a Gradient Boosting classifier
clf_with = GradientBoostingClassifier(random_state=42)
clf_without = GradientBoostingClassifier(random_state=42)

# Train the models
clf_with.fit(X_train_with, y_train_with.idxmax(axis=1))
clf_without.fit(X_train_without, y_train_without.idxmax(axis=1))

# Make predictions
y_pred_with = clf_with.predict(X_test_with)
y_pred_without = clf_without.predict(X_test_without)

# Calculate the classification reports
report_with = classification_report(y_test_with.idxmax(axis=1), y_pred_with)
report_without = classification_report(y_test_without.idxmax(axis=1), y_pred_without)

In [9]:
# Print the classification reports
print('With other defaults:\n', report_with)
print('Without other defaults:\n', report_without)

With other:
               precision    recall  f1-score   support

       Bumps       0.62      0.64      0.63        72
   Dirtiness       0.60      0.75      0.67         8
    K_Scatch       0.95      0.95      0.95        83
Other_Faults       0.74      0.76      0.75       143
      Pastry       0.68      0.59      0.63        29
      Stains       1.00      0.92      0.96        13
   Z_Scratch       0.97      0.90      0.94        41

    accuracy                           0.79       389
   macro avg       0.80      0.79      0.79       389
weighted avg       0.79      0.79      0.79       389

Without other:
                 precision    recall  f1-score   support

         Bumps       0.61      0.64      0.62        70
     Dirtiness       0.60      0.75      0.67         8
      K_Scatch       0.85      0.85      0.85        13
        Pastry       0.76      0.59      0.67        27
SigmoidOfAreas       0.85      0.87      0.86       223
        Stains       1.00      0.92  