Threshold had been added in this code

# Logistic Regressions

In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import joblib  # For saving the model

# Load the train dataset
trainset = pd.read_csv('incidents_labelled.csv', index_col=0)

# Group rare categories into 'Other' in 'hazard', 'product', 'hazard-category', and 'product-category'
threshold = 10  # Categories with <= 10 occurrences will be grouped
for col in ['hazard', 'product', 'hazard-category', 'product-category']:
    trainset[col] = trainset[col].apply(lambda x: x if trainset[col].value_counts()[x] > threshold else 'Other')

# Split dataset into training and testing sets
X = trainset['text']
y_hazard = trainset['hazard']
y_product = trainset['product']
y_hazard_cat = trainset['hazard-category']
y_product_cat = trainset['product-category']

X_train, X_test, y_train_hazard, y_test_hazard = train_test_split(X, y_hazard, test_size=0.2, random_state=42)
_, _, y_train_product, y_test_product = train_test_split(X, y_product, test_size=0.2, random_state=42)
_, _, y_train_hazard_cat, y_test_hazard_cat = train_test_split(X, y_hazard_cat, test_size=0.2, random_state=42)
_, _, y_train_product_cat, y_test_product_cat = train_test_split(X, y_product_cat, test_size=0.2, random_state=42)

# Prepare pipeline: TfidfVectorizer and Logistic Regression
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),   # Convert text to TF-IDF features
    ('classifier', LogisticRegression(max_iter=1000))  # Logistic Regression with higher max_iter for convergence
])

# Train the model for 'hazard'
pipeline.fit(X_train, y_train_hazard)
y_pred_hazard = pipeline.predict(X_test)
print("Classification Report for 'hazard':")
print(classification_report(y_test_hazard, y_pred_hazard))  # Display classification report
joblib.dump(pipeline, 'logistic_hazard_model.pkl')  # Save the trained model

# Train the model for 'product'
pipeline.fit(X_train, y_train_product)
y_pred_product = pipeline.predict(X_test)
print("Classification Report for 'product':")
print(classification_report(y_test_product, y_pred_product))  # Display classification report
joblib.dump(pipeline, 'logistic_product_model.pkl')  # Save the trained model

# Train the model for 'hazard-category'
pipeline.fit(X_train, y_train_hazard_cat)
y_pred_hazard_cat = pipeline.predict(X_test)
print("Classification Report for 'hazard-category':")
print(classification_report(y_test_hazard_cat, y_pred_hazard_cat))  # Display classification report
joblib.dump(pipeline, 'logistic_hazard_category_model.pkl')  # Save the trained model

# Train the model for 'product-category'
pipeline.fit(X_train, y_train_product_cat)
y_pred_product_cat = pipeline.predict(X_test)
print("Classification Report for 'product-category':")
print(classification_report(y_test_product_cat, y_pred_product_cat))  # Display classification report
joblib.dump(pipeline, 'logistic_product_category_model.pkl')  # Save the trained model

print("All models trained, saved, and classification reports displayed.")


Classification Report for 'hazard':
                                                precision    recall  f1-score   support

                                     Aflatoxin       0.00      0.00      0.00         3
                                         Other       0.31      0.58      0.40        92
                                     alkaloids       1.00      0.14      0.25         7
                                     allergens       0.00      0.00      0.00         3
                                        almond       0.50      0.25      0.33        12
                                 bacillus spp.       0.00      0.00      0.00         5
                                 bone fragment       0.00      0.00      0.00         3
                             bulging packaging       0.00      0.00      0.00         8
                                        cashew       0.00      0.00      0.00         5
                   celery and products thereof       0.00      0.00      0.00      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report for 'product':
                                     precision    recall  f1-score   support

             Catfishes (freshwater)       0.00      0.00      0.00         1
              Fishes not identified       0.00      0.00      0.00         8
           Not classified pork meat       0.00      0.00      0.00         3
                              Other       0.44      0.99      0.61       505
Precooked cooked pork meat products       0.00      0.00      0.00         3
                    alfalfa sprouts       0.00      0.00      0.00         1
                              algae       0.00      0.00      0.00         5
                            almonds       0.00      0.00      0.00         4
                        apple juice       0.00      0.00      0.00         2
                    apricot kernels       0.00      0.00      0.00         3
                          baby food       0.00      0.00      0.00         4
                    bakery products   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report for 'hazard-category':
                                precision    recall  f1-score   support

                     allergens       0.87      0.96      0.91       377
                    biological       0.83      0.97      0.89       398
                      chemical       0.80      0.73      0.76       107
food additives and flavourings       1.00      0.29      0.44         7
                foreign bodies       0.83      0.87      0.85       166
                         fraud       0.62      0.27      0.38        77
                     migration       0.00      0.00      0.00         1
          organoleptic aspects       0.00      0.00      0.00        13
                  other hazard       0.80      0.12      0.21        33
              packaging defect       0.00      0.00      0.00        18

                      accuracy                           0.83      1197
                     macro avg       0.57      0.42      0.44      1197
                 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report for 'product-category':
                                                   precision    recall  f1-score   support

                                            Other       0.00      0.00      0.00         5
                              alcoholic beverages       1.00      0.07      0.13        14
                      cereals and bakery products       0.44      0.74      0.55       149
     cocoa and cocoa preparations, coffee and tea       0.86      0.41      0.55        44
                                    confectionery       1.00      0.18      0.31        38
dietetic foods, food supplements, fortified foods       0.91      0.32      0.48        31
                                    fats and oils       0.00      0.00      0.00         4
                   food additives and flavourings       0.00      0.00      0.00         1
                           food contact materials       1.00      1.00      1.00         1
                            fruits and vege

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


All models trained, saved, and classification reports displayed.


In [None]:
# Import necessary libraries
import pandas as pd
import joblib  # For loading the trained models

# Load the test dataset (no true labels needed)
testset = pd.read_csv('incidents_val.csv', index_col=0)

# Load the trained models
hazard_model = joblib.load('logistic_hazard_model.pkl')
product_model = joblib.load('logistic_product_model.pkl')
hazard_category_model = joblib.load('logistic_hazard_category_model.pkl')
product_category_model = joblib.load('logistic_product_category_model.pkl')

# Prepare the test data (text)
X_test = testset['text']

# Predict labels for testset using the trained models

# Predict 'hazard'
testset['predicted_hazard'] = hazard_model.predict(X_test)

# Predict 'product'
testset['predicted_product'] = product_model.predict(X_test)

# Predict 'hazard-category'
testset['predicted_hazard_category'] = hazard_category_model.predict(X_test)

# Predict 'product-category'
testset['predicted_product_category'] = product_category_model.predict(X_test)

# Save the predictions to a CSV file
testset[['predicted_hazard', 'predicted_product', 'predicted_hazard_category', 'predicted_product_category']].to_csv('testset_predictions.csv', index=False)

print("Predictions saved to 'testset_predictions.csv'.")


# SVM

In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import joblib  # For saving the model

# Load the train dataset
trainset = pd.read_csv('incidents_labelled.csv', index_col=0)

# Group rare categories into 'Other' in 'hazard', 'product', 'hazard-category', and 'product-category'
threshold = 10  # Categories with <= 10 occurrences will be grouped
for col in ['hazard', 'product', 'hazard-category', 'product-category']:
    trainset[col] = trainset[col].apply(lambda x: x if trainset[col].value_counts()[x] > threshold else 'Other')

# Split dataset into training and testing sets
X = trainset['text']
y_hazard = trainset['hazard']
y_product = trainset['product']
y_hazard_cat = trainset['hazard-category']
y_product_cat = trainset['product-category']

X_train, X_test, y_train_hazard, y_test_hazard = train_test_split(X, y_hazard, test_size=0.2, random_state=42)
_, _, y_train_product, y_test_product = train_test_split(X, y_product, test_size=0.2, random_state=42)
_, _, y_train_hazard_cat, y_test_hazard_cat = train_test_split(X, y_hazard_cat, test_size=0.2, random_state=42)
_, _, y_train_product_cat, y_test_product_cat = train_test_split(X, y_product_cat, test_size=0.2, random_state=42)

# Prepare pipeline: TfidfVectorizer and SVM (Support Vector Machine)
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),   # Convert text to TF-IDF features
    ('classifier', SVC(kernel='linear', max_iter=1000))  # SVM with linear kernel and max iteration limit
])

# Train the model for 'hazard'
pipeline.fit(X_train, y_train_hazard)
y_pred_hazard = pipeline.predict(X_test)
print("Classification Report for 'hazard':")
print(classification_report(y_test_hazard, y_pred_hazard))  # Display classification report
joblib.dump(pipeline, 'svm_hazard_model.pkl')  # Save the trained model

# Train the model for 'product'
pipeline.fit(X_train, y_train_product)
y_pred_product = pipeline.predict(X_test)
print("Classification Report for 'product':")
print(classification_report(y_test_product, y_pred_product))  # Display classification report
joblib.dump(pipeline, 'svm_product_model.pkl')  # Save the trained model

# Train the model for 'hazard-category'
pipeline.fit(X_train, y_train_hazard_cat)
y_pred_hazard_cat = pipeline.predict(X_test)
print("Classification Report for 'hazard-category':")
print(classification_report(y_test_hazard_cat, y_pred_hazard_cat))  # Display classification report
joblib.dump(pipeline, 'svm_hazard_category_model.pkl')  # Save the trained model

# Train the model for 'product-category'
pipeline.fit(X_train, y_train_product_cat)
y_pred_product_cat = pipeline.predict(X_test)
print("Classification Report for 'product-category':")
print(classification_report(y_test_product_cat, y_pred_product_cat))  # Display classification report
joblib.dump(pipeline, 'svm_product_category_model.pkl')  # Save the trained model

print("All models trained, saved, and classification reports displayed.")




Classification Report for 'hazard':
                                                precision    recall  f1-score   support

                                     Aflatoxin       0.00      0.00      0.00         3
                                         Other       0.28      0.70      0.40        92
                                     alkaloids       0.67      0.29      0.40         7
                                     allergens       0.00      0.00      0.00         3
                                        almond       0.56      0.42      0.48        12
                                 bacillus spp.       0.00      0.00      0.00         5
                                 bone fragment       0.00      0.00      0.00         3
                             bulging packaging       1.00      0.50      0.67         8
                                        cashew       0.75      0.60      0.67         5
                   celery and products thereof       0.00      0.00      0.00      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report for 'product':
                                     precision    recall  f1-score   support

             Catfishes (freshwater)       0.00      0.00      0.00         1
              Fishes not identified       0.00      0.00      0.00         8
           Not classified pork meat       0.00      0.00      0.00         3
                              Other       0.49      0.97      0.65       505
Precooked cooked pork meat products       0.00      0.00      0.00         3
                    alfalfa sprouts       0.00      0.00      0.00         1
                              algae       1.00      0.20      0.33         5
                            almonds       0.00      0.00      0.00         4
                        apple juice       0.00      0.00      0.00         2
                    apricot kernels       0.67      0.67      0.67         3
                          baby food       0.00      0.00      0.00         4
                    bakery products   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report for 'hazard-category':
                                precision    recall  f1-score   support

                     allergens       0.90      0.96      0.93       377
                    biological       0.88      0.97      0.92       398
                      chemical       0.82      0.86      0.84       107
food additives and flavourings       1.00      0.43      0.60         7
                foreign bodies       0.85      0.92      0.88       166
                         fraud       0.68      0.35      0.46        77
                     migration       1.00      1.00      1.00         1
          organoleptic aspects       0.00      0.00      0.00        13
                  other hazard       0.82      0.27      0.41        33
              packaging defect       0.71      0.28      0.40        18

                      accuracy                           0.87      1197
                     macro avg       0.77      0.60      0.64      1197
                 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report for 'product-category':
                                                   precision    recall  f1-score   support

                                            Other       0.00      0.00      0.00         5
                              alcoholic beverages       0.89      0.57      0.70        14
                      cereals and bakery products       0.44      0.75      0.56       149
     cocoa and cocoa preparations, coffee and tea       0.73      0.50      0.59        44
                                    confectionery       0.92      0.29      0.44        38
dietetic foods, food supplements, fortified foods       0.94      0.52      0.67        31
                                    fats and oils       1.00      0.25      0.40         4
                   food additives and flavourings       0.00      0.00      0.00         1
                           food contact materials       1.00      1.00      1.00         1
                            fruits and vege

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


All models trained, saved, and classification reports displayed.


# RF

In [3]:
# Import necessary libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import joblib  # For saving the model

# Load the train dataset
trainset = pd.read_csv('incidents_labelled.csv', index_col=0)

# Group rare categories into 'Other' in 'hazard', 'product', 'hazard-category', and 'product-category'
threshold = 10  # Categories with <= 10 occurrences will be grouped
for col in ['hazard', 'product', 'hazard-category', 'product-category']:
    trainset[col] = trainset[col].apply(lambda x: x if trainset[col].value_counts()[x] > threshold else 'Other')

# Split dataset into training and testing sets
X = trainset['text']
y_hazard = trainset['hazard']
y_product = trainset['product']
y_hazard_cat = trainset['hazard-category']
y_product_cat = trainset['product-category']

X_train, X_test, y_train_hazard, y_test_hazard = train_test_split(X, y_hazard, test_size=0.2, random_state=42)
_, _, y_train_product, y_test_product = train_test_split(X, y_product, test_size=0.2, random_state=42)
_, _, y_train_hazard_cat, y_test_hazard_cat = train_test_split(X, y_hazard_cat, test_size=0.2, random_state=42)
_, _, y_train_product_cat, y_test_product_cat = train_test_split(X, y_product_cat, test_size=0.2, random_state=42)

# Prepare pipeline: TfidfVectorizer and Random Forest Classifier
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),   # Convert text to TF-IDF features
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))  # Random Forest with 100 trees
])

# Train the model for 'hazard'
pipeline.fit(X_train, y_train_hazard)
y_pred_hazard = pipeline.predict(X_test)
print("Classification Report for 'hazard':")
print(classification_report(y_test_hazard, y_pred_hazard))  # Display classification report
joblib.dump(pipeline, 'random_forest_hazard_model.pkl')  # Save the trained model

# Train the model for 'product'
pipeline.fit(X_train, y_train_product)
y_pred_product = pipeline.predict(X_test)
print("Classification Report for 'product':")
print(classification_report(y_test_product, y_pred_product))  # Display classification report
joblib.dump(pipeline, 'random_forest_product_model.pkl')  # Save the trained model

# Train the model for 'hazard-category'
pipeline.fit(X_train, y_train_hazard_cat)
y_pred_hazard_cat = pipeline.predict(X_test)
print("Classification Report for 'hazard-category':")
print(classification_report(y_test_hazard_cat, y_pred_hazard_cat))  # Display classification report
joblib.dump(pipeline, 'random_forest_hazard_category_model.pkl')  # Save the trained model

# Train the model for 'product-category'
pipeline.fit(X_train, y_train_product_cat)
y_pred_product_cat = pipeline.predict(X_test)
print("Classification Report for 'product-category':")
print(classification_report(y_test_product_cat, y_pred_product_cat))  # Display classification report
joblib.dump(pipeline, 'random_forest_product_category_model.pkl')  # Save the trained model

print("All models trained, saved, and classification reports displayed.")


Classification Report for 'hazard':
                                                precision    recall  f1-score   support

                                     Aflatoxin       0.00      0.00      0.00         3
                                         Other       0.35      0.40      0.38        92
                                     alkaloids       0.67      0.29      0.40         7
                                     allergens       0.00      0.00      0.00         3
                                        almond       0.50      0.08      0.14        12
                                 bacillus spp.       0.00      0.00      0.00         5
                                 bone fragment       0.00      0.00      0.00         3
                             bulging packaging       0.80      0.50      0.62         8
                                        cashew       0.00      0.00      0.00         5
                   celery and products thereof       0.00      0.00      0.00      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report for 'product':
                                     precision    recall  f1-score   support

             Catfishes (freshwater)       0.00      0.00      0.00         1
              Fishes not identified       1.00      0.12      0.22         8
           Not classified pork meat       0.00      0.00      0.00         3
                              Other       0.46      0.97      0.62       505
Precooked cooked pork meat products       0.00      0.00      0.00         3
                    alfalfa sprouts       0.00      0.00      0.00         1
                              algae       1.00      0.20      0.33         5
                            almonds       0.00      0.00      0.00         4
                        apple juice       0.00      0.00      0.00         2
                    apricot kernels       0.00      0.00      0.00         3
                          baby food       0.00      0.00      0.00         4
                    bakery products   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report for 'hazard-category':
                                precision    recall  f1-score   support

                     allergens       0.87      0.96      0.91       377
                    biological       0.84      0.96      0.90       398
                      chemical       0.80      0.65      0.72       107
food additives and flavourings       1.00      0.29      0.44         7
                foreign bodies       0.77      0.86      0.81       166
                         fraud       0.62      0.39      0.48        77
                     migration       1.00      1.00      1.00         1
          organoleptic aspects       0.00      0.00      0.00        13
                  other hazard       0.75      0.18      0.29        33
              packaging defect       0.00      0.00      0.00        18

                      accuracy                           0.83      1197
                     macro avg       0.67      0.53      0.56      1197
                 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report for 'product-category':
                                                   precision    recall  f1-score   support

                                            Other       0.00      0.00      0.00         5
                              alcoholic beverages       0.83      0.36      0.50        14
                      cereals and bakery products       0.31      0.59      0.41       149
     cocoa and cocoa preparations, coffee and tea       0.56      0.23      0.32        44
                                    confectionery       0.56      0.13      0.21        38
dietetic foods, food supplements, fortified foods       0.80      0.26      0.39        31
                                    fats and oils       1.00      0.25      0.40         4
                   food additives and flavourings       0.00      0.00      0.00         1
                           food contact materials       1.00      1.00      1.00         1
                            fruits and vege

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


All models trained, saved, and classification reports displayed.


# MLP

In [4]:
# Import necessary libraries
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import joblib  # For saving the model

# Load the train dataset
trainset = pd.read_csv('incidents_labelled.csv', index_col=0)

# Group rare categories into 'Other' in 'hazard', 'product', 'hazard-category', and 'product-category'
threshold = 10  # Categories with <= 10 occurrences will be grouped
for col in ['hazard', 'product', 'hazard-category', 'product-category']:
    trainset[col] = trainset[col].apply(lambda x: x if trainset[col].value_counts()[x] > threshold else 'Other')

# Split dataset into training and testing sets
X = trainset['text']
y_hazard = trainset['hazard']
y_product = trainset['product']
y_hazard_cat = trainset['hazard-category']
y_product_cat = trainset['product-category']

X_train, X_test, y_train_hazard, y_test_hazard = train_test_split(X, y_hazard, test_size=0.2, random_state=42)
_, _, y_train_product, y_test_product = train_test_split(X, y_product, test_size=0.2, random_state=42)
_, _, y_train_hazard_cat, y_test_hazard_cat = train_test_split(X, y_hazard_cat, test_size=0.2, random_state=42)
_, _, y_train_product_cat, y_test_product_cat = train_test_split(X, y_product_cat, test_size=0.2, random_state=42)

# Prepare pipeline: TfidfVectorizer and MLPClassifier (Multi-Layer Perceptron)
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),   # Convert text to TF-IDF features
    ('classifier', MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42))  # MLP with 1 hidden layer
])

# Train the model for 'hazard'
pipeline.fit(X_train, y_train_hazard)
y_pred_hazard = pipeline.predict(X_test)
print("Classification Report for 'hazard':")
print(classification_report(y_test_hazard, y_pred_hazard))  # Display classification report
joblib.dump(pipeline, 'mlp_hazard_model.pkl')  # Save the trained model

# Train the model for 'product'
pipeline.fit(X_train, y_train_product)
y_pred_product = pipeline.predict(X_test)
print("Classification Report for 'product':")
print(classification_report(y_test_product, y_pred_product))  # Display classification report
joblib.dump(pipeline, 'mlp_product_model.pkl')  # Save the trained model

# Train the model for 'hazard-category'
pipeline.fit(X_train, y_train_hazard_cat)
y_pred_hazard_cat = pipeline.predict(X_test)
print("Classification Report for 'hazard-category':")
print(classification_report(y_test_hazard_cat, y_pred_hazard_cat))  # Display classification report
joblib.dump(pipeline, 'mlp_hazard_category_model.pkl')  # Save the trained model

# Train the model for 'product-category'
pipeline.fit(X_train, y_train_product_cat)
y_pred_product_cat = pipeline.predict(X_test)
print("Classification Report for 'product-category':")
print(classification_report(y_test_product_cat, y_pred_product_cat))  # Display classification report
joblib.dump(pipeline, 'mlp_product_category_model.pkl')  # Save the trained model

print("All models trained, saved, and classification reports displayed.")


Classification Report for 'hazard':
                                                precision    recall  f1-score   support

                                     Aflatoxin       0.00      0.00      0.00         3
                                         Other       0.46      0.48      0.47        92
                                     alkaloids       0.75      0.43      0.55         7
                                     allergens       0.00      0.00      0.00         3
                                        almond       0.56      0.42      0.48        12
                                 bacillus spp.       1.00      0.40      0.57         5
                                 bone fragment       0.00      0.00      0.00         3
                             bulging packaging       0.88      0.88      0.88         8
                                        cashew       0.75      0.60      0.67         5
                   celery and products thereof       1.00      0.25      0.40      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report for 'product':
                                     precision    recall  f1-score   support

             Catfishes (freshwater)       0.00      0.00      0.00         1
              Fishes not identified       0.60      0.38      0.46         8
           Not classified pork meat       0.00      0.00      0.00         3
                              Other       0.55      0.86      0.67       505
Precooked cooked pork meat products       0.00      0.00      0.00         3
                    alfalfa sprouts       0.00      0.00      0.00         1
                              algae       0.50      0.40      0.44         5
                            almonds       0.00      0.00      0.00         4
                        apple juice       1.00      1.00      1.00         2
                    apricot kernels       0.67      0.67      0.67         3
                          baby food       0.00      0.00      0.00         4
                    bakery products   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report for 'hazard-category':
                                precision    recall  f1-score   support

                     allergens       0.88      0.95      0.92       377
                    biological       0.91      0.95      0.93       398
                      chemical       0.81      0.85      0.83       107
food additives and flavourings       1.00      0.43      0.60         7
                foreign bodies       0.88      0.89      0.88       166
                         fraud       0.66      0.43      0.52        77
                     migration       1.00      1.00      1.00         1
          organoleptic aspects       0.71      0.38      0.50        13
                  other hazard       0.68      0.45      0.55        33
              packaging defect       0.89      0.44      0.59        18

                      accuracy                           0.87      1197
                     macro avg       0.84      0.68      0.73      1197
                 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


All models trained, saved, and classification reports displayed.
