In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

#**DATA**

In [None]:
data = pd.read_csv('/content/final_ymt_2.csv')


In [None]:
data = data[data['Quantity'] >= 5]
data = data[data['Quantity'] <=100000 ]
data = data[data['Machines_Zone'] !=3 ]

data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 694 entries, 84 to 89712
Data columns (total 20 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Seq            694 non-null    int64  
 1   Urgent         694 non-null    int64  
 2   CAONO          694 non-null    int64  
 3   Quantity       694 non-null    float64
 4   Shift          694 non-null    int64  
 5   ProdProgress   694 non-null    int64  
 6   Machines_ID    694 non-null    int64  
 7   Machines_Zone  694 non-null    int64  
 8   Capability     694 non-null    int64  
 9   OrderDate      694 non-null    object 
 10  isFilAvail     694 non-null    int64  
 11  S_number_id    694 non-null    int64  
 12  ypn1_ID        694 non-null    float64
 13  ypn2_ID        694 non-null    float64
 14  Year           694 non-null    int64  
 15  Day            694 non-null    int64  
 16  Month          694 non-null    int64  
 17  Hour           694 non-null    int64  
 18  Minute      

In [None]:
scaler = StandardScaler()
data['Quantity'] = scaler.fit_transform(data[['Quantity']])

In [None]:
# Selecting categorical features
categorical_features = ['Shift', 'Machines_ID', 'Machines_Zone', 'Capability']

# One-hot encode the categorical features
data = pd.get_dummies(data, columns=categorical_features)

In [None]:
features = ['Quantity', 'Shift', 'ProdProgress', 'Machines_ID', 'Machines_Zone', 'Capability', 'isFilAvail', 'S_number_id', 'ypn1_ID', 'ypn2_ID', 'Day', 'Month', 'Hour', 'Minute', 'step_rank']
categorical_features = ['Shift', 'ProdProgress', 'Machines_ID', 'Machines_Zone', 'Capability', 'isFilAvail', 'S_number_id', 'ypn1_ID', 'ypn2_ID']

# Apply one-hot encoding to categorical features
X = pd.get_dummies(data[features], columns=categorical_features, drop_first=True)
y = data['Seq']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# **RandomForest**

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Initialize the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.89      0.83      0.86      9374
           2       0.41      0.53      0.47      5272
           3       0.32      0.24      0.27      3309

    accuracy                           0.63     17955
   macro avg       0.54      0.53      0.53     17955
weighted avg       0.65      0.63      0.64     17955

Accuracy: 0.6337510442773601


# **GridSearchCV**

In [None]:
from sklearn.model_selection import RandomizedSearchCV

param_dist = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

random_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=10, cv=3, n_jobs=-1, verbose=2, random_state=42)
random_search.fit(X_train, y_train)

best_model = random_search.best_estimator_

# Make predictions with the best model
y_pred_best = best_model.predict(X_test)

# Evaluate the best model
from sklearn.metrics import classification_report, accuracy_score
print(classification_report(y_test, y_pred_best))
print("Accuracy:", accuracy_score(y_test, y_pred_best))


Fitting 3 folds for each of 10 candidates, totalling 30 fits
              precision    recall  f1-score   support

           1       1.00      0.82      0.90      9374
           2       0.52      0.93      0.67      5272
           3       0.56      0.15      0.23      3309

    accuracy                           0.73     17955
   macro avg       0.69      0.63      0.60     17955
weighted avg       0.78      0.73      0.71     17955

Accuracy: 0.7274296853244222


In [None]:
from sklearn.tree import DecisionTreeClassifier

# Initialize and train the model
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(classification_report(y_test, y_pred))
print(f'Accuracy: {accuracy * 100:.2f}%')

              precision    recall  f1-score   support

           1       0.87      0.86      0.86      7214
           2       0.49      0.50      0.50      3999
           3       0.39      0.39      0.39      2418

    accuracy                           0.67     13631
   macro avg       0.58      0.58      0.58     13631
weighted avg       0.67      0.67      0.67     13631

Accuracy: 66.97%


# **GradientBoost**

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
# Initialize and train the model
model = GradientBoostingClassifier()
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print(classification_report(y_test, y_pred))


Accuracy: 0.73
              precision    recall  f1-score   support

           1       0.99      0.82      0.90      9374
           2       0.52      0.93      0.67      5272
           3       0.57      0.15      0.24      3309

    accuracy                           0.73     17955
   macro avg       0.70      0.63      0.60     17955
weighted avg       0.78      0.73      0.71     17955



# **MLPC**

In [None]:
from sklearn.neural_network import MLPClassifier
model_1 = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500)
model_1.fit(X_train, y_train)
# Predict on the test set
y_pred = model_1.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print(classification_report(y_test, y_pred))


Accuracy: 0.52
              precision    recall  f1-score   support

           1       0.52      1.00      0.69      9374
           2       0.00      0.00      0.00      5272
           3       0.00      0.00      0.00      3309

    accuracy                           0.52     17955
   macro avg       0.17      0.33      0.23     17955
weighted avg       0.27      0.52      0.36     17955



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


##**Smote**

In [None]:
pip install imblearn



In [None]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Initialize the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_res, y_res)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.95      0.82      0.88      9576
           2       0.78      0.82      0.80      9434
           3       0.86      0.93      0.89      9453

    accuracy                           0.86     28463
   macro avg       0.86      0.86      0.86     28463
weighted avg       0.86      0.86      0.86     28463

Accuracy: 0.8573235428450972


In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
# Initialize and train the model
model = GradientBoostingClassifier()
model.fit(X_res, y_res)

# Predict on the test set
y_pred = model.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print(classification_report(y_test, y_pred))


KeyboardInterrupt: 

# **Test_cat**

In [None]:
data = pd.read_csv('/content/final_ymt_2.csv')
data = data[data['Quantity'] >= 5]
data = data[data['Quantity'] <=100000 ]
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 89771 entries, 0 to 89770
Data columns (total 20 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Seq            89771 non-null  int64  
 1   Urgent         89771 non-null  int64  
 2   CAONO          89771 non-null  int64  
 3   Quantity       89771 non-null  float64
 4   Shift          89771 non-null  int64  
 5   ProdProgress   89771 non-null  int64  
 6   Machines_ID    89771 non-null  int64  
 7   Machines_Zone  89771 non-null  int64  
 8   Capability     89771 non-null  int64  
 9   OrderDate      89771 non-null  object 
 10  isFilAvail     89771 non-null  int64  
 11  S_number_id    89771 non-null  int64  
 12  ypn1_ID        89771 non-null  float64
 13  ypn2_ID        89771 non-null  float64
 14  Year           89771 non-null  int64  
 15  Day            89771 non-null  int64  
 16  Month          89771 non-null  int64  
 17  Hour           89771 non-null  int64  
 18  Minute

In [None]:
target_column = 'Seq'
# Separate features and target
X = data.drop(columns=['Seq','OrderDate'])
y = data[target_column]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 89771 entries, 0 to 89770
Data columns (total 20 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Seq            89771 non-null  int64  
 1   Urgent         89771 non-null  int64  
 2   CAONO          89771 non-null  int64  
 3   Quantity       89771 non-null  float64
 4   Shift          89771 non-null  int64  
 5   ProdProgress   89771 non-null  int64  
 6   Machines_ID    89771 non-null  int64  
 7   Machines_Zone  89771 non-null  int64  
 8   Capability     89771 non-null  int64  
 9   OrderDate      89771 non-null  object 
 10  isFilAvail     89771 non-null  int64  
 11  S_number_id    89771 non-null  int64  
 12  ypn1_ID        89771 non-null  float64
 13  ypn2_ID        89771 non-null  float64
 14  Year           89771 non-null  int64  
 15  Day            89771 non-null  int64  
 16  Month          89771 non-null  int64  
 17  Hour           89771 non-null  int64  
 18  Minute

##**randomForest**

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Initialize the model
model = RandomForestClassifier(n_estimators=200, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.84      0.63      0.72        75
           2       0.35      0.51      0.42        43
           3       0.25      0.24      0.24        21

    accuracy                           0.53       139
   macro avg       0.48      0.46      0.46       139
weighted avg       0.60      0.53      0.55       139

Accuracy: 0.5323741007194245


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV


In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

random_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=10, cv=3, n_jobs=2, verbose=2, random_state=42)
random_search.fit(X_train, y_train)

best_model = random_search.best_estimator_

# Make predictions with the best model
y_pred_best = best_model.predict(X_test)

# Evaluate the best model
from sklearn.metrics import classification_report, accuracy_score
print(classification_report(y_test, y_pred_best))
print("Accuracy:", accuracy_score(y_test, y_pred_best))

Fitting 3 folds for each of 10 candidates, totalling 30 fits
              precision    recall  f1-score   support

           1       0.96      0.63      0.76        75
           2       0.44      0.74      0.55        43
           3       0.41      0.33      0.37        21

    accuracy                           0.62       139
   macro avg       0.60      0.57      0.56       139
weighted avg       0.72      0.62      0.64       139

Accuracy: 0.6187050359712231


##**XGBClassifier**

In [None]:
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, accuracy_score

# Adjust target labels to start from 0
y_train_adjusted = y_train - 1
y_test_adjusted = y_test - 1

# Initialize the model
xgb_model = XGBClassifier(random_state=42)

# Train the model
xgb_model.fit(X_train, y_train_adjusted)

# Evaluate the model on the training set
y_train_pred_adjusted = xgb_model.predict(X_train)
print("Training Set Performance:")
print(classification_report(y_train_adjusted, y_train_pred_adjusted))
train_accuracy = accuracy_score(y_train_adjusted, y_train_pred_adjusted)
print(f"Training Accuracy: {train_accuracy}")

# Evaluate the model on the testing set
y_test_pred_adjusted = xgb_model.predict(X_test)
print("\nTesting Set Performance:")
print(classification_report(y_test_adjusted, y_test_pred_adjusted))
test_accuracy = accuracy_score(y_test_adjusted, y_test_pred_adjusted)
print(f"Testing Accuracy: {test_accuracy}")

Training Set Performance:
              precision    recall  f1-score   support

           0       0.99      0.85      0.91     38064
           1       0.58      0.91      0.71     20923
           2       0.73      0.35      0.48     12829

    accuracy                           0.78     71816
   macro avg       0.77      0.70      0.70     71816
weighted avg       0.82      0.78      0.78     71816

Training Accuracy: 0.778614793360811

Testing Set Performance:
              precision    recall  f1-score   support

           0       0.97      0.83      0.89      9374
           1       0.51      0.81      0.63      5272
           2       0.48      0.23      0.31      3309

    accuracy                           0.71     17955
   macro avg       0.65      0.62      0.61     17955
weighted avg       0.74      0.71      0.71     17955

Testing Accuracy: 0.7137287663603453


#**NormalData +XGB+RandomizesSearch**

In [None]:
from sklearn.model_selection import RandomizedSearchCV
import xgboost as xgb


# Adjust target labels to start from 0
y_train_adjusted = y_train - 1
y_test_adjusted = y_test - 1

# Define the parameter grid for RandomizedSearchCV
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [3, 5, 7, 10],
    'min_child_weight': [1, 3, 5],
    'gamma': [0, 0.1, 0.2, 0.3],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0]
}

# Initialize the XGBClassifier
xgb_model = xgb.XGBClassifier(random_state=42)

# Initialize the RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=xgb_model, param_distributions=param_grid, n_iter=50, cv=5, verbose=2, random_state=42, n_jobs=-1)

# Fit the random search model
random_search.fit(X_train, y_train_adjusted)

# Get the best model
best_xgb_model = random_search.best_estimator_

# Evaluate the model on the training set
y_train_pred_adjusted = best_xgb_model.predict(X_train)
print("Training Set Performance:")
print(classification_report(y_train_adjusted, y_train_pred_adjusted))
train_accuracy = accuracy_score(y_train_adjusted, y_train_pred_adjusted)
print(f"Training Accuracy: {train_accuracy}")

# Evaluate the model on the testing set
y_test_pred_adjusted = best_xgb_model.predict(X_test)
print("\nTesting Set Performance:")
print(classification_report(y_test_adjusted, y_test_pred_adjusted))
test_accuracy = accuracy_score(y_test_adjusted, y_test_pred_adjusted)
print(f"Testing Accuracy: {test_accuracy}")

# Cross-validation
cv_scores = cross_val_score(best_xgb_model, X, y - 1, cv=5, scoring='accuracy')
print(f"\nCross-validation scores: {cv_scores}")
print(f"Mean cross-validation score: {cv_scores.mean()}")

# Convert predictions back to the original labels for interpretation
y_train_pred = y_train_pred_adjusted + 1
y_test_pred = y_test_pred_adjusted + 1

print("\nConverted Testing Set Performance:")
print(classification_report(y_test, y_test_pred))
print(f"Converted Testing Accuracy: {accuracy_score(y_test, y_test_pred)}")

Fitting 5 folds for each of 50 candidates, totalling 250 fits
Training Set Performance:
              precision    recall  f1-score   support

           0       1.00      0.82      0.90     38064
           1       0.53      0.93      0.67     20923
           2       0.61      0.17      0.26     12829

    accuracy                           0.73     71816
   macro avg       0.71      0.64      0.61     71816
weighted avg       0.79      0.73      0.72     71816

Training Accuracy: 0.7339450818759051

Testing Set Performance:
              precision    recall  f1-score   support

           0       1.00      0.82      0.90      9374
           1       0.52      0.93      0.67      5272
           2       0.57      0.16      0.25      3309

    accuracy                           0.73     17955
   macro avg       0.70      0.64      0.61     17955
weighted avg       0.78      0.73      0.71     17955

Testing Accuracy: 0.7301030353661933

Cross-validation scores: [0.59949875 0.7275816  

In [None]:
from sklearn.model_selection import cross_val_score

# Cross-validation
cv_scores = cross_val_score(best_xgb_model, X, y - 1, cv=5, scoring='accuracy')
print(f"\nCross-validation scores: {cv_scores}")
print(f"Mean cross-validation score: {cv_scores.mean()}")

NameError: name 'best_xgb_model' is not defined

#**MLPClassifier**

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
# Define the MLP model
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, alpha=0.0001, solver='adam', random_state=42)

# Train the MLP model
mlp.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

NameError: name 'model' is not defined

#**full_stack_model**


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import classification_report, accuracy_score

# Adjust target labels to start from 0
y_train_adjusted = y_train - 1
y_test_adjusted = y_test - 1

# Model 1: Random Forest with RandomizedSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

random_forest = RandomForestClassifier(random_state=42)
random_search = RandomizedSearchCV(estimator=random_forest, param_distributions=param_grid, n_iter=10, cv=3, n_jobs=2, verbose=2, random_state=42)
random_search.fit(X_train, y_train_adjusted)
best_rf_model = random_search.best_estimator_

# Model 2: XGBoost
xgb_model = XGBClassifier(random_state=42)
xgb_model.fit(X_train, y_train_adjusted)

# Stacking model
estimators = [
    ('rf', best_rf_model),
    ('xgb', xgb_model)
]

stacked_model = StackingClassifier(estimators=estimators, final_estimator=RandomForestClassifier(random_state=42))
stacked_model.fit(X_train, y_train_adjusted)

# Evaluate the stacked model on the training set
y_train_pred = stacked_model.predict(X_train)
print("Training Set Performance:")
print(classification_report(y_train_adjusted, y_train_pred))
train_accuracy = accuracy_score(y_train_adjusted, y_train_pred)
print(f"Training Accuracy: {train_accuracy}")

# Evaluate the stacked model on the testing set
y_test_pred = stacked_model.predict(X_test)
print("\nTesting Set Performance:")
print(classification_report(y_test_adjusted, y_test_pred))
test_accuracy = accuracy_score(y_test_adjusted, y_test_pred)
print(f"Testing Accuracy: {test_accuracy}")

# Convert predictions back to the original labels for interpretation
y_train_pred_converted = y_train_pred + 1
y_test_pred_converted = y_test_pred + 1

print("\nConverted Testing Set Performance:")
print(classification_report(y_test, y_test_pred_converted))
print(f"Converted Testing Accuracy: {accuracy_score(y_test, y_test_pred_converted)}")


Fitting 3 folds for each of 10 candidates, totalling 30 fits
Training Set Performance:
              precision    recall  f1-score   support

           0       0.93      0.84      0.88     38064
           1       0.49      0.64      0.55     20923
           2       0.41      0.31      0.35     12829

    accuracy                           0.69     71816
   macro avg       0.61      0.60      0.60     71816
weighted avg       0.71      0.69      0.69     71816

Training Accuracy: 0.6873398685529687

Testing Set Performance:
              precision    recall  f1-score   support

           0       0.94      0.85      0.89      9374
           1       0.54      0.72      0.62      5272
           2       0.45      0.32      0.37      3309

    accuracy                           0.72     17955
   macro avg       0.64      0.63      0.63     17955
weighted avg       0.73      0.72      0.72     17955

Testing Accuracy: 0.7161236424394319

Converted Testing Set Performance:
              

#**NN**

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
# Define the MLP model
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, alpha=0.0001, solver='adam', random_state=42)

# Train the MLP model
mlp.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))


              precision    recall  f1-score   support

           1       0.89      0.83      0.86      9374
           2       0.41      0.53      0.46      5272
           3       0.31      0.23      0.27      3309

    accuracy                           0.63     17955
   macro avg       0.54      0.53      0.53     17955
weighted avg       0.64      0.63      0.63     17955

Accuracy: 0.6301865775549986


#**SMOTE**

In [None]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)


ModuleNotFoundError: No module named 'imblearn'

##RandomForestClassifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Initialize the model
model = RandomForestClassifier(n_estimators=200, random_state=42)

# Train the model
model.fit(X_res, y_res)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.91      0.83      0.87      9374
           2       0.41      0.50      0.45      5272
           3       0.34      0.31      0.32      3309

    accuracy                           0.64     17955
   macro avg       0.55      0.55      0.55     17955
weighted avg       0.66      0.64      0.64     17955

Accuracy: 0.6357560568086884


#**gradien Boosting Classifier**

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
# Initialize and train the model
model = GradientBoostingClassifier()
model.fit(X_res, y_res)

# Predict on the test set
y_pred = model.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print(classification_report(y_test, y_pred))


Accuracy: 0.72
              precision    recall  f1-score   support

           1       1.00      0.82      0.90      9374
           2       0.52      0.73      0.61      5272
           3       0.47      0.40      0.43      3309

    accuracy                           0.72     17955
   macro avg       0.66      0.65      0.65     17955
weighted avg       0.76      0.72      0.73     17955



In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
# Initialize and train the model
model = GradientBoostingClassifier()
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print(classification_report(y_test, y_pred))


Accuracy: 0.73
              precision    recall  f1-score   support

           1       0.99      0.82      0.90      9374
           2       0.52      0.93      0.67      5272
           3       0.57      0.15      0.24      3309

    accuracy                           0.73     17955
   macro avg       0.70      0.63      0.60     17955
weighted avg       0.78      0.73      0.71     17955



In [None]:
pip install imblearn



#**RandomOverSampler:**

In [None]:
from imblearn.over_sampling import RandomOverSampler
# Apply RandomOverSampler
ros = RandomOverSampler()
X_resampled1, y_resampled1 = ros.fit_resample(X, y)

# Check the new class distribution
print(pd.Series(y_resampled1).value_counts())


Seq
3    47438
2    47438
1    47438
Name: count, dtype: int64


In [None]:
# Split the resampled data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled1, y_resampled1, test_size=0.2, random_state=42)


##RandomForest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Initialize the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.89      0.83      0.86      9374
           2       0.41      0.53      0.47      5272
           3       0.32      0.24      0.27      3309

    accuracy                           0.63     17955
   macro avg       0.54      0.53      0.53     17955
weighted avg       0.65      0.63      0.64     17955

Accuracy: 0.6339181286549708


In [None]:
# Evaluate the model on the training set
y_train_pred = model.predict(X_train)
print("Training Set Performance:")
print(classification_report(y_train, y_train_pred))
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f"Training Accuracy: {train_accuracy}")

# Evaluate the model on the testing set
y_test_pred = model.predict(X_test)
print("\nTesting Set Performance:")
print(classification_report(y_test, y_test_pred))
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Testing Accuracy: {test_accuracy}")

# Compare the accuracies
if train_accuracy > test_accuracy:
    print("\nThe model may be overfitting.")
else:
    print("\nThe model does not appear to be overfitting.")

Training Set Performance:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00     37862
           2       1.00      1.00      1.00     38004
           3       1.00      1.00      1.00     37985

    accuracy                           1.00    113851
   macro avg       1.00      1.00      1.00    113851
weighted avg       1.00      1.00      1.00    113851

Training Accuracy: 0.9999912165901046

Testing Set Performance:
              precision    recall  f1-score   support

           1       0.95      0.82      0.88      9576
           2       0.77      0.82      0.80      9434
           3       0.86      0.93      0.89      9453

    accuracy                           0.86     28463
   macro avg       0.86      0.86      0.86     28463
weighted avg       0.86      0.86      0.86     28463

Testing Accuracy: 0.8564803429013105

The model may be overfitting.


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Initialize the model with regularization
model = RandomForestClassifier(
    n_estimators=200,
    random_state=42,
    max_depth=10,  # Limit the depth of the tree
    min_samples_split=10,  # Minimum number of samples required to split an internal node
    min_samples_leaf=5  # Minimum number of samples required to be at a leaf node
)

# Train the model
model.fit(X_train, y_train)

# Evaluate the model on the training set
y_train_pred = model.predict(X_train)
print("Training Set Performance:")
print(classification_report(y_train, y_train_pred))
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f"Training Accuracy: {train_accuracy}")

# Evaluate the model on the testing set
y_test_pred = model.predict(X_test)
print("\nTesting Set Performance:")
print(classification_report(y_test, y_test_pred))
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Testing Accuracy: {test_accuracy}")

# Compare the accuracies
if train_accuracy > test_accuracy:
    print("\nThe model may be overfitting.")
else:
    print("\nThe model does not appear to be overfitting.")


Training Set Performance:
              precision    recall  f1-score   support

           1       1.00      0.81      0.90     37862
           2       0.53      0.69      0.60     38004
           3       0.63      0.55      0.59     37985

    accuracy                           0.69    113851
   macro avg       0.72      0.69      0.69    113851
weighted avg       0.72      0.69      0.69    113851

Training Accuracy: 0.6850971884304925

Testing Set Performance:
              precision    recall  f1-score   support

           1       1.00      0.81      0.90      9576
           2       0.52      0.68      0.59      9434
           3       0.61      0.53      0.57      9453

    accuracy                           0.68     28463
   macro avg       0.71      0.68      0.68     28463
weighted avg       0.71      0.68      0.69     28463

Testing Accuracy: 0.6762112215859185

The model may be overfitting.


In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
# Initialize and train the model
model = GradientBoostingClassifier()
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(classification_report(y_test, y_pred))


Accuracy: 0.67
              precision    recall  f1-score   support

           1       1.00      0.81      0.90      9576
           2       0.51      0.62      0.56      9434
           3       0.59      0.57      0.58      9453

    accuracy                           0.67     28463
   macro avg       0.70      0.67      0.68     28463
weighted avg       0.70      0.67      0.68     28463



In [None]:
from sklearn.model_selection import RandomizedSearchCV

param_dist = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

random_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=10, cv=3, n_jobs=2, verbose=2, random_state=42)
random_search.fit(X_train, y_train)

best_model = random_search.best_estimator_

# Make predictions with the best model
y_pred_best = best_model.predict(X_test)

# Evaluate the best model
from sklearn.metrics import classification_report, accuracy_score
print(classification_report(y_test, y_pred_best))
print("Accuracy:", accuracy_score(y_test, y_pred_best))

Fitting 3 folds for each of 10 candidates, totalling 30 fits
              precision    recall  f1-score   support

           1       0.96      0.82      0.88      9576
           2       0.77      0.82      0.80      9434
           3       0.86      0.93      0.89      9453

    accuracy                           0.86     28463
   macro avg       0.86      0.86      0.86     28463
weighted avg       0.86      0.86      0.86     28463

Accuracy: 0.8562344095843727


In [None]:
# Evaluate the model on the training set
y_train_pred = best_model.predict(X_train)
print("Training Set Performance:")
print(classification_report(y_train, y_train_pred))
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f"Training Accuracy: {train_accuracy}")

# Evaluate the model on the testing set
y_test_pred = best_model.predict(X_test)
print("\nTesting Set Performance:")
print(classification_report(y_test, y_test_pred))
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Testing Accuracy: {test_accuracy}")


Training Set Performance:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00     37862
           2       1.00      1.00      1.00     38004
           3       1.00      1.00      1.00     37985

    accuracy                           1.00    113851
   macro avg       1.00      1.00      1.00    113851
weighted avg       1.00      1.00      1.00    113851

Training Accuracy: 0.9984014193990391

Testing Set Performance:
              precision    recall  f1-score   support

           1       0.96      0.82      0.88      9576
           2       0.77      0.82      0.80      9434
           3       0.86      0.93      0.89      9453

    accuracy                           0.86     28463
   macro avg       0.86      0.86      0.86     28463
weighted avg       0.86      0.86      0.86     28463

Testing Accuracy: 0.8562344095843727

The model may be overfitting.


In [None]:
from sklearn.model_selection import cross_val_score

cv_scores = cross_val_score(best_model, X_train, y_train, cv=5, scoring='accuracy')
print(f"Cross-validation scores: {cv_scores}")
print(f"Mean cross-validation score: {cv_scores.mean()}")


Cross-validation scores: [0.82618243 0.82938076 0.82683355 0.83236715 0.82547211]
Mean cross-validation score: 0.8280472008570486


In [None]:
from sklearn.feature_selection import SelectFromModel

# Use the feature importances from the best model to select features
selector = SelectFromModel(best_model, threshold='median')
selector.fit(X_train, y_train)
X_train_selected = selector.transform(X_train)
X_test_selected = selector.transform(X_test)

# Train the model again with the selected features
best_model.fit(X_train_selected, y_train)
y_test_pred = best_model.predict(X_test_selected)
print("\nTesting Set Performance with Selected Features:")
print(classification_report(y_test, y_test_pred))
test_accuracy_selected = accuracy_score(y_test, y_test_pred)
print(f"Testing Accuracy with Selected Features: {test_accuracy_selected}")



Testing Set Performance with Selected Features:
              precision    recall  f1-score   support

           1       0.96      0.82      0.88      9576
           2       0.78      0.83      0.80      9434
           3       0.86      0.93      0.89      9453

    accuracy                           0.86     28463
   macro avg       0.87      0.86      0.86     28463
weighted avg       0.87      0.86      0.86     28463

Testing Accuracy with Selected Features: 0.8593964093735728


In [None]:
pip install xgboost



In [None]:
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, accuracy_score

# Adjust target labels to start from 0
y_train_adjusted = y_train - 1
y_test_adjusted = y_test - 1

# Initialize the model
xgb_model = XGBClassifier(random_state=42)

# Train the model
xgb_model.fit(X_train_selected, y_train_adjusted)

# Evaluate the model on the training set
y_train_pred_adjusted = xgb_model.predict(X_train_selected)
print("Training Set Performance:")
print(classification_report(y_train_adjusted, y_train_pred_adjusted))
train_accuracy = accuracy_score(y_train_adjusted, y_train_pred_adjusted)
print(f"Training Accuracy: {train_accuracy}")

# Evaluate the model on the testing set
y_test_pred_adjusted = xgb_model.predict(X_test_selected)
print("\nTesting Set Performance:")
print(classification_report(y_test_adjusted, y_test_pred_adjusted))
test_accuracy = accuracy_score(y_test_adjusted, y_test_pred_adjusted)
print(f"Testing Accuracy: {test_accuracy}")

# Cross-validation
cv_scores = cross_val_score(xgb_model, X_train_selected, y_train_adjusted, cv=5, scoring='accuracy')
print(f"\nCross-validation scores: {cv_scores}")
print(f"Mean cross-validation score: {cv_scores.mean()}")

# Convert predictions back to the original labels for interpretation
y_train_pred = y_train_pred_adjusted + 1
y_test_pred = y_test_pred_adjusted + 1

print("\nConverted Testing Set Performance:")
print(classification_report(y_test, y_test_pred))
print(f"Converted Testing Accuracy: {accuracy_score(y_test, y_test_pred)}")


Training Set Performance:
              precision    recall  f1-score   support

           0       1.00      0.82      0.90     37862
           1       0.63      0.65      0.64     38004
           2       0.66      0.75      0.71     37985

    accuracy                           0.74    113851
   macro avg       0.76      0.74      0.75    113851
weighted avg       0.76      0.74      0.75    113851

Training Accuracy: 0.7433048458072393

Testing Set Performance:
              precision    recall  f1-score   support

           0       0.99      0.82      0.90      9576
           1       0.57      0.59      0.58      9434
           2       0.61      0.70      0.65      9453

    accuracy                           0.70     28463
   macro avg       0.72      0.70      0.71     28463
weighted avg       0.72      0.70      0.71     28463

Testing Accuracy: 0.7016828865544742

Cross-validation scores: [0.69289886 0.70382082 0.70320597 0.70281072 0.70136144]
Mean cross-validation score:

In [None]:
from sklearn.model_selection import RandomizedSearchCV
import xgboost as xgb

# Define the parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [3, 5, 7, 10],
    'min_child_weight': [1, 3, 5],
    'gamma': [0, 0.1, 0.2, 0.3],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0]
}

# Initialize the XGBClassifier
xgb_model = xgb.XGBClassifier(random_state=42)

# Initialize the RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=xgb_model, param_distributions=param_grid, n_iter=50, cv=5, verbose=2, random_state=42, n_jobs=-1)

# Fit the random search model
random_search.fit(X_train_selected, y_train_adjusted)

# Get the best model
best_xgb_model = random_search.best_estimator_

# Evaluate the model on the training set
y_train_pred_adjusted = best_xgb_model.predict(X_train_selected)
print("Training Set Performance:")
print(classification_report(y_train_adjusted, y_train_pred_adjusted))
train_accuracy = accuracy_score(y_train_adjusted, y_train_pred_adjusted)
print(f"Training Accuracy: {train_accuracy}")

# Evaluate the model on the testing set
y_test_pred_adjusted = best_xgb_model.predict(X_test_selected)
print("\nTesting Set Performance:")
print(classification_report(y_test_adjusted, y_test_pred_adjusted))
test_accuracy = accuracy_score(y_test_adjusted, y_test_pred_adjusted)
print(f"Testing Accuracy: {test_accuracy}")

# Cross-validation
cv_scores = cross_val_score(best_xgb_model, X_train_selected, y_train_adjusted, cv=5, scoring='accuracy')
print(f"\nCross-validation scores: {cv_scores}")
print(f"Mean cross-validation score: {cv_scores.mean()}")

# Convert predictions back to the original labels for interpretation
y_train_pred = y_train_pred_adjusted + 1
y_test_pred = y_test_pred_adjusted + 1

print("\nConverted Testing Set Performance:")
print(classification_report(y_test, y_test_pred))
print(f"Converted Testing Accuracy: {accuracy_score(y_test, y_test_pred)}")


Fitting 5 folds for each of 50 candidates, totalling 250 fits
Training Set Performance:
              precision    recall  f1-score   support

           0       1.00      0.93      0.96     37862
           1       0.91      0.95      0.93     38004
           2       0.94      0.97      0.96     37985

    accuracy                           0.95    113851
   macro avg       0.95      0.95      0.95    113851
weighted avg       0.95      0.95      0.95    113851

Training Accuracy: 0.9499872640556517

Testing Set Performance:
              precision    recall  f1-score   support

           0       0.95      0.82      0.88      9576
           1       0.72      0.75      0.74      9434
           2       0.80      0.87      0.83      9453

    accuracy                           0.82     28463
   macro avg       0.82      0.82      0.82     28463
weighted avg       0.82      0.82      0.82     28463

Testing Accuracy: 0.8153040789797281

Cross-validation scores: [0.79197225 0.79552042 

#**MLPC**

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
# Define the MLP model
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, alpha=0.0001, solver='adam', random_state=42)

# Train the MLP model
mlp.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))


              precision    recall  f1-score   support

           1       0.95      0.95      0.95      9576
           2       0.79      0.87      0.83      9434
           3       0.90      0.81      0.86      9453

    accuracy                           0.88     28463
   macro avg       0.88      0.88      0.88     28463
weighted avg       0.88      0.88      0.88     28463

Accuracy: 0.8779819414678706


In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, accuracy_score

# Define the MLP model
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, alpha=0.0001, solver='adam', random_state=42)

# Perform cross-validation
cv_scores = cross_val_score(mlp, X_train, y_train, cv=5, scoring='accuracy')

# Output the cross-validation scores
print(f"Cross-validation scores: {cv_scores}")
print(f"Mean cross-validation score: {cv_scores.mean()}")
print(f"Standard deviation of cross-validation scores: {cv_scores.std()}")

# Fit the model on the entire training set
mlp.fit(X_train, y_train)

# Make predictions on the testing set
y_test_pred = mlp.predict(X_test)

# Evaluate the model on the testing set
print("\nTesting Set Performance:")
print(classification_report(y_test, y_test_pred))
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Testing Accuracy: {test_accuracy}")

# Compare cross-validation mean score with test accuracy
print(f"\nMean cross-validation accuracy: {cv_scores.mean()}")
print(f"Testing accuracy: {test_accuracy}")

# Check for overfitting
if cv_scores.mean() > test_accuracy + 0.05:  # Adjust threshold as needed
    print("\nThe model may be overfitting.")
else:
    print("\nThe model does not seem to be overfitting.")


Cross-validation scores: [0.3336261  0.33381643 0.33254282 0.33381643 0.33377251]
Mean cross-validation score: 0.3335148561607081
Standard deviation of cross-validation scores: 0.0004910199991446109

Testing Set Performance:
              precision    recall  f1-score   support

           1       0.34      1.00      0.50      9576
           2       0.00      0.00      0.00      9434
           3       0.00      0.00      0.00      9453

    accuracy                           0.34     28463
   macro avg       0.11      0.33      0.17     28463
weighted avg       0.11      0.34      0.17     28463

Testing Accuracy: 0.3364367775708815

Mean cross-validation accuracy: 0.3335148561607081
Testing accuracy: 0.3364367775708815

The model does not seem to be overfitting.


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#**Model_FINALE**

In [None]:
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import numpy as np


In [None]:
# Define base models
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
xgb_model = XGBClassifier( eval_metric='mlogloss', random_state=42)

# Define the stacking classifier
stacked_model = StackingClassifier(
    estimators=[
        ('rf', rf_model),
        ('xgb', xgb_model)
    ],
    final_estimator=RandomForestClassifier(n_estimators=100, random_state=42),
    cv=5
)
# Train the stacked model
stacked_model.fit(X_train_selected, y_train_adjusted)

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



In [None]:
# Evaluate the model on the training set
y_train_pred = stacked_model.predict(X_train_selected)
print("Training Set Performance:")
print(classification_report(y_train_adjusted, y_train_pred))
train_accuracy = accuracy_score(y_train_adjusted, y_train_pred)
print(f"Training Accuracy: {train_accuracy}")

# Evaluate the model on the testing set
y_test_pred = stacked_model.predict(X_test_selected)
print("\nTesting Set Performance:")
print(classification_report(y_test_adjusted, y_test_pred))
test_accuracy = accuracy_score(y_test_adjusted, y_test_pred)
print(f"Testing Accuracy: {test_accuracy}")

# Convert predictions back to the original labels for interpretation
y_train_pred = y_train_pred + 1
y_test_pred = y_test_pred + 1

print("\nConverted Testing Set Performance:")
print(classification_report(y_test, y_test_pred))
print(f"Converted Testing Accuracy: {accuracy_score(y_test, y_test_pred)}")

Training Set Performance:
              precision    recall  f1-score   support

           0       0.99      0.81      0.89     37862
           1       0.84      0.99      0.91     38004
           2       1.00      0.99      1.00     37985

    accuracy                           0.93    113851
   macro avg       0.94      0.93      0.93    113851
weighted avg       0.94      0.93      0.93    113851

Training Accuracy: 0.9326312461023618

Testing Set Performance:
              precision    recall  f1-score   support

           0       0.95      0.85      0.90      9576
           1       0.79      0.89      0.84      9434
           2       0.91      0.90      0.91      9453

    accuracy                           0.88     28463
   macro avg       0.88      0.88      0.88     28463
weighted avg       0.88      0.88      0.88     28463

Testing Accuracy: 0.8785440747637283

Converted Testing Set Performance:
              precision    recall  f1-score   support

           1       0

#**NN & rf**

In [None]:
# Adjust target labels for MLP
y_train_adjusted = y_train - 1
y_test_adjusted = y_test - 1

# Define base models
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
mlp_model = MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, alpha=0.0001, solver='adam', random_state=42)

# Train MLP model separately (for comparison)
mlp_model.fit(X_train, y_train_adjusted)
y_pred_mlp = mlp_model.predict(X_test)
print("MLP Classifier Performance:")
print(classification_report(y_test_adjusted, y_pred_mlp))
print("MLP Accuracy:", accuracy_score(y_test_adjusted, y_pred_mlp))

# Define the stacking classifier
stacked_model = StackingClassifier(
    estimators=[
        ('rf', rf_model),
        ('mlp', mlp_model)
    ],
    final_estimator=RandomForestClassifier(n_estimators=100, random_state=42),
    cv=5
)

# Train the stacked model
stacked_model.fit(X_train, y_train_adjusted)

# Evaluate the stacked model on the training set
y_train_pred = stacked_model.predict(X_train)
print("\nStacked Model Training Set Performance:")
print(classification_report(y_train_adjusted, y_train_pred))
train_accuracy = accuracy_score(y_train_adjusted, y_train_pred)
print(f"Stacked Model Training Accuracy: {train_accuracy}")

# Evaluate the stacked model on the testing set
y_test_pred = stacked_model.predict(X_test)
print("\nStacked Model Testing Set Performance:")
print(classification_report(y_test_adjusted, y_test_pred))
test_accuracy = accuracy_score(y_test_adjusted, y_test_pred)
print(f"Stacked Model Testing Accuracy: {test_accuracy}")

# Convert predictions back to the original labels for interpretation
y_train_pred_converted = y_train_pred + 1
y_test_pred_converted = y_test_pred + 1

print("\nConverted Testing Set Performance:")
print(classification_report(y_test, y_test_pred_converted))
print(f"Converted Testing Accuracy: {accuracy_score(y_test, y_test_pred_converted)}")

MLP Classifier Performance:
              precision    recall  f1-score   support

           0       0.34      1.00      0.50      9576
           1       0.00      0.00      0.00      9434
           2       0.00      0.00      0.00      9453

    accuracy                           0.34     28463
   macro avg       0.11      0.33      0.17     28463
weighted avg       0.11      0.34      0.17     28463

MLP Accuracy: 0.3364367775708815


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Stacked Model Training Set Performance:
              precision    recall  f1-score   support

           0       0.98      0.82      0.89     37862
           1       0.84      0.98      0.90     38004
           2       0.99      0.99      0.99     37985

    accuracy                           0.93    113851
   macro avg       0.94      0.93      0.93    113851
weighted avg       0.94      0.93      0.93    113851

Stacked Model Training Accuracy: 0.9303475595295606

Stacked Model Testing Set Performance:
              precision    recall  f1-score   support

           0       0.92      0.85      0.88      9576
           1       0.79      0.84      0.81      9434
           2       0.89      0.91      0.90      9453

    accuracy                           0.86     28463
   macro avg       0.87      0.86      0.86     28463
weighted avg       0.87      0.86      0.86     28463

Stacked Model Testing Accuracy: 0.8638232090784528

Converted Testing Set Performance:
              prec