#**UFC Fight Outcome Prediction Using multi models**

##**Step 1: Install and Import Libraries**

In [1]:
# Install necessary libraries (if not already installed)
!pip install pandas numpy scikit-learn seaborn matplotlib catboost tensorflow

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


In [2]:
# Import data manipulation libraries
import pandas as pd
import numpy as np

# Import visualization libraries
from IPython.display import display,  HTML
import seaborn as sns
import matplotlib.pyplot as plt

# Import machine learning libraries
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import  cross_val_score

# Import models
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2

# Import load model
from tensorflow.keras.models import load_model

##**Step 2: Load and Prepare Data**

In [3]:
# Load from local directory if uploaded to Colab
original_df = pd.read_csv('ufc-master.csv')

# Preview the data
original_df.head()

Unnamed: 0,RedFighter,BlueFighter,RedOdds,BlueOdds,RedExpectedValue,BlueExpectedValue,Date,Location,Country,Winner,...,FinishDetails,FinishRound,FinishRoundTime,TotalFightTimeSecs,RedDecOdds,BlueDecOdds,RSubOdds,BSubOdds,RKOOdds,BKOOdds
0,Neil Magny,Carlos Prates,550.0,-800.0,550.0,12.5,2024-11-09,"Las Vegas, Nevada, USA",USA,Blue,...,Punch,1.0,4:50,290.0,1100.0,800.0,1600.0,800.0,2000.0,-400.0
1,Gerald Meerschaert,Reinier de Ridder,250.0,-310.0,250.0,32.2581,2024-11-09,"Las Vegas, Nevada, USA",USA,Blue,...,Arm Triangle,3.0,1:44,704.0,700.0,200.0,600.0,165.0,1400.0,450.0
2,Gaston Bolanos,Cortavious Romious,205.0,-250.0,205.0,40.0,2024-11-09,"Las Vegas, Nevada, USA",USA,Red,...,,3.0,5:00,900.0,550.0,275.0,3000.0,165.0,380.0,450.0
3,Luana Pinheiro,Gillian Robertson,360.0,-470.0,360.0,21.2766,2024-11-09,"Las Vegas, Nevada, USA",USA,Blue,...,,3.0,5:00,900.0,600.0,250.0,2200.0,110.0,900.0,500.0
4,Mansur Abdul-Malik,Dusko Todorovic,-485.0,370.0,20.6186,370.0,2024-11-09,"Las Vegas, Nevada, USA",USA,Red,...,Punches,1.0,2:44,164.0,600.0,,800.0,,-250.0,


##**Step 3: Data Cleaning and preparing**

In [4]:
# Load the original dataset again
df = pd.read_csv('ufc-master.csv')

###**3.1 Handle Missing Values**

In [5]:
# Calculate the percentage of missing values
missing_percentages = df.isnull().mean() * 100

# Create a DataFrame to display the results
missing_table = pd.DataFrame({
    'Column': missing_percentages.index,
    'Missing Percentage': missing_percentages.values
})

# Sort the table by missing percentage in descending order
missing_table = missing_table.sort_values(by='Missing Percentage', ascending=False)

# Display the table nicely in Jupyter Notebook
display(missing_table)

Unnamed: 0,Column,Missing Percentage
94,BWFeatherweightRank,99.984589
81,RWFeatherweightRank,99.861304
105,BPFPRank,98.967483
93,BWFlyweightRank,98.890430
80,RWFlyweightRank,98.520573
...,...,...
46,RedLosses,0.000000
45,RedLongestWinStreak,0.000000
39,RedDraws,0.000000
38,RedCurrentWinStreak,0.000000


In [6]:
# Identify numerical and categorical columns
numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = df.select_dtypes(include=['object']).columns

# Impute numerical columns with mean
df[numerical_cols] = df[numerical_cols].fillna(df[numerical_cols].mean())

# Impute categorical columns with mode
for col in categorical_cols:
    df[col] = df[col].fillna(df[col].mode()[0])

In [7]:
# Verify Missing Values Are Handled
missing_values = df.isnull().sum()

# Convert to a DataFrame for better display
missing_values_df = pd.DataFrame({
    "Column": missing_values.index,
    "Missing Values": missing_values.values
})

# Display the missing values as a table
display(HTML("<h3>Missing values after handling:</h3>"))
display(missing_values_df)

Unnamed: 0,Column,Missing Values
0,RedFighter,0
1,BlueFighter,0
2,RedOdds,0
3,BlueOdds,0
4,RedExpectedValue,0
...,...,...
113,BlueDecOdds,0
114,RSubOdds,0
115,BSubOdds,0
116,RKOOdds,0


###**3.2 Encode Categorical Variables**

In [8]:
# Define categorical features to encode
categorical_features = ['RedFighter', 'BlueFighter', 'Location', 'Country','WeightClass', 'TitleBout', 'Gender',  'Winner',  'BlueStance',  'RedStance', 'BetterRank',  'Finish', 'FinishDetails', 'FinishRoundTime']

# Initialize LabelEncoder
le = LabelEncoder()

# Encode categorical features
for col in categorical_features:
    df[col] = le.fit_transform(df[col])

# Preview encode data
df.head()

Unnamed: 0,RedFighter,BlueFighter,RedOdds,BlueOdds,RedExpectedValue,BlueExpectedValue,Date,Location,Country,Winner,...,FinishDetails,FinishRound,FinishRoundTime,TotalFightTimeSecs,RedDecOdds,BlueDecOdds,RSubOdds,BSubOdds,RKOOdds,BKOOdds
0,1185,279,550.0,-800.0,550.0,12.5,2024-11-09,68,32,0,...,26,1.0,283,290.0,1100.0,800.0,1600.0,800.0,2000.0,-400.0
1,557,1510,250.0,-310.0,250.0,32.2581,2024-11-09,68,32,0,...,2,3.0,97,704.0,700.0,200.0,600.0,165.0,1400.0,450.0
2,548,386,205.0,-250.0,205.0,40.0,2024-11-09,68,32,1,...,26,3.0,293,900.0,550.0,275.0,3000.0,165.0,380.0,450.0
3,973,662,360.0,-470.0,360.0,21.2766,2024-11-09,68,32,0,...,26,3.0,293,900.0,600.0,250.0,2200.0,110.0,900.0,500.0
4,1017,531,-485.0,370.0,20.6186,370.0,2024-11-09,68,32,1,...,27,1.0,157,164.0,600.0,424.888372,800.0,1096.948168,-250.0,634.561879


###**3.3 Define Features and Targets**

In [9]:
# Define features (exclude target and unnecessary columns)
X = df.drop(['Date', 'Winner', 'Finish', 'FinishRound'], axis=1)

# Define targets
y_winner = df['Winner']
y_method = df['Finish']
y_round = pd.to_numeric(df['FinishRound'], errors='coerce').fillna(0).astype(int)

###**3.4 Map Data**

In [10]:
x_winner = original_df["Winner"]

# Create a mapping dictionary from x_winner (text) to y_winner (numeric)
mapping_dict = dict(zip(x_winner.unique(), y_winner.unique()))

# Convert the mapping dictionary to a DataFrame
mapping_df = pd.DataFrame(list(mapping_dict.items()), columns=["Winner (Text)", "Winner (Numeric)"])

# Display the mapping as a table
display(HTML("<h3>Mapping between 'Winner' text and numeric labels:</h3>"))
display(mapping_df)

Unnamed: 0,Winner (Text),Winner (Numeric)
0,Blue,0
1,Red,1


In [11]:
x_method = original_df['Finish']

# Create a mapping dictionary from x_method (text) to y_method (numeric)
mapping_dict = dict(zip(x_method.unique(), y_method.unique()))

# Convert the mapping dictionary to a DataFrame
mapping_df = pd.DataFrame(list(mapping_dict.items()), columns=["Finish (Text)", "Finish (Numeric)"])

# Display the mapping as a table
display(HTML("<h3>Mapping between 'Finish' text and numeric labels:</h3>"))
display(mapping_df)

Unnamed: 0,Finish (Text),Finish (Numeric)
0,KO/TKO,1
1,SUB,5
2,U-DEC,6
3,S-DEC,4
4,M-DEC,2
5,DQ,0
6,,3


In [12]:
x_round = original_df['FinishRound']

# Create a mapping dictionary from x_round to y_round
mapping_dict = dict(zip(x_round.unique(), y_round.unique()))

# Convert the mapping dictionary to a DataFrame
mapping_df = pd.DataFrame(list(mapping_dict.items()), columns=["Finish Round (Original)", "Finish Round (Mapped)"])

# Display the mapping as a table
display(HTML("<h3>Mapping between 'FinishRound' text and numeric labels:</h3>"))
display(mapping_df)

Unnamed: 0,Finish Round (Original),Finish Round (Mapped)
0,1.0,1
1,3.0,3
2,2.0,2
3,5.0,5
4,4.0,4


##**Step 4: Split Data Based on Date**

###**4.1 Define Cut-off Dates**

In [13]:
# Define the cut-off date for training and testing
train_end_date = pd.to_datetime('2024-03-31')
test_start_date = pd.to_datetime('2024-04-01')

###**4.2 Split the Data**

In [14]:
df['Date'] = pd.to_datetime(df['Date'])

# Create training data: fights up to March 31, 2024
df_train = df[df['Date'] <= train_end_date]

# Create testing data: fights from April 1, 2024 onwards
df_test = df[df['Date'] >= test_start_date]

# Create a summary DataFrame
data_summary = pd.DataFrame({
    'Dataset': ['Training', 'Testing'],
    'Number of Records': [len(df_train), len(df_test)]
})

# Display the summary as a table
display(data_summary)

Unnamed: 0,Dataset,Number of Records
0,Training,6163
1,Testing,326


###**4.3 Prepare Features and Targets for Training and Testing**

In [15]:
# Load the selected features CSV file
selected_features = pd.read_csv('selected_features_40.csv')

# Load the selected features
features_to_use = selected_features['Selected Features'].tolist()

# Features and targets for training data
X_train = df_train[features_to_use]
y_train_winner = df_train['Winner']
y_train_method = df_train['Finish']
y_train_round = pd.to_numeric(df_train['FinishRound'], errors='coerce').fillna(0).astype(int)

# Features and targets for testing data
X_test = df_test[features_to_use]
y_test_winner = df_test['Winner']
y_test_method = df_test['Finish']
y_test_round = pd.to_numeric(df_test['FinishRound'], errors='coerce').fillna(0).astype(int)

In [16]:
# Display the X training data
display(HTML("<h3>X Training Data:</h3>"))
X_train.head()

Unnamed: 0,RedFighter,BlueFighter,RedOdds,BlueOdds,RedExpectedValue,BlueExpectedValue,Location,BlueAvgSigStrLanded,BlueAvgSigStrPct,BlueAvgTDLanded,...,AvgSubAttDif,AvgTDDif,FinishRoundTime,TotalFightTimeSecs,RedDecOdds,BlueDecOdds,RSubOdds,BSubOdds,RKOOdds,BKOOdds
326,506,1175,-218.0,180.0,45.8716,180.0,7,6.26,0.41,1.17,...,-1.0,-0.88,293,1500.0,225.0,300.0,180.0,2800.0,800.0,500.0
327,1569,867,-148.0,124.0,67.5676,124.0,7,4.25,0.36,1.69,...,-0.6,0.69,190,497.0,500.0,300.0,450.0,2500.0,250.0,240.0
328,295,244,145.0,-175.0,145.0,57.1429,7,4.12,0.49,0.93,...,-0.8,-2.51,131,738.0,400.0,450.0,500.0,1600.0,1200.0,-110.0
329,1213,1636,-258.0,210.0,38.7597,210.0,7,2.15,0.48,1.31,...,0.3,0.54,191,198.0,800.0,550.0,150.0,1000.0,250.0,400.0
330,171,1076,-298.0,240.0,33.557,240.0,7,3.65,0.45,1.1,...,0.5,0.07,233,240.0,120.0,300.0,400.0,1800.0,450.0,1000.0


In [17]:
# Display the X test data
display(HTML("<h3>X Test Data:</h3>"))
X_test.head()

Unnamed: 0,RedFighter,BlueFighter,RedOdds,BlueOdds,RedExpectedValue,BlueExpectedValue,Location,BlueAvgSigStrLanded,BlueAvgSigStrPct,BlueAvgTDLanded,...,AvgSubAttDif,AvgTDDif,FinishRoundTime,TotalFightTimeSecs,RedDecOdds,BlueDecOdds,RSubOdds,BSubOdds,RKOOdds,BKOOdds
0,1185,279,550.0,-800.0,550.0,12.5,68,4.39,0.57,0.44,...,-0.2,-1.68,283,290.0,1100.0,800.0,1600.0,800.0,2000.0,-400.0
1,557,1510,250.0,-310.0,250.0,32.2581,68,3.24,0.57,6.39,...,-0.2,4.38,97,704.0,700.0,200.0,600.0,165.0,1400.0,450.0
2,548,386,205.0,-250.0,205.0,40.0,68,2.1,0.37,1.97,...,1.0,1.19,293,900.0,550.0,275.0,3000.0,165.0,380.0,450.0
3,973,662,360.0,-470.0,360.0,21.2766,68,2.89,0.48,2.68,...,0.8,0.88,293,900.0,600.0,250.0,2200.0,110.0,900.0,500.0
4,1017,531,-485.0,370.0,20.6186,370.0,68,5.35,0.57,0.92,...,0.0,0.92,157,164.0,600.0,424.888372,800.0,1096.948168,-250.0,634.561879


###**4.4 Scale Features**

In [18]:
# Initialize scaler
scaler = StandardScaler()

# Fit the scaler to the training data
scaler.fit(X_train)

# Scale the features using the previously fitted scaler
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [19]:
# Convert scaled training data back to DataFrames
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train.columns)

# Display the scaled training DataFrames
display(HTML("<h3>Scaled Training Data:</h3>"))
display(X_train_scaled_df)

Unnamed: 0,RedFighter,BlueFighter,RedOdds,BlueOdds,RedExpectedValue,BlueExpectedValue,Location,BlueAvgSigStrLanded,BlueAvgSigStrPct,BlueAvgTDLanded,...,AvgSubAttDif,AvgTDDif,FinishRoundTime,TotalFightTimeSecs,RedDecOdds,BlueDecOdds,RSubOdds,BSubOdds,RKOOdds,BKOOdds
0,-0.654674,0.426359,-0.381414,0.479980,-0.599424,0.111045,-1.768143,-0.766533,-0.393829,-0.111905,...,-1.061476,-0.403440,0.712938,2.475694,-0.359016,-0.427773,-1.312647,2.898319,0.784097,-0.328489
1,1.588769,-0.130970,-0.121489,0.253714,-0.340326,-0.304753,-1.768143,-0.872587,-0.869645,0.300628,...,-0.605000,0.497409,-0.447625,-0.459508,0.855953,-0.427773,-0.802940,2.388854,-0.696937,-0.964648
2,-1.099985,-1.258295,0.966483,-0.954382,0.584389,-0.801163,-1.768143,-0.879446,0.367477,-0.302305,...,-0.833238,-1.338718,-1.112413,0.245760,0.414146,0.088148,-0.708550,0.860460,1.861212,-1.821015
3,0.837438,1.260544,-0.529942,0.601193,-0.684356,0.333793,-1.768143,-0.983389,0.272313,-0.000838,...,0.422072,0.411341,-0.436358,-1.334508,2.181374,0.432096,-1.369281,-0.158470,-0.696937,-0.573165
4,-1.361685,0.247218,-0.678471,0.722407,-0.746487,0.556542,-1.768143,-0.904244,-0.013176,-0.167439,...,0.650310,0.141660,0.036882,-1.211599,-0.822913,-0.427773,-0.897330,1.200103,-0.158379,0.894894
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6158,-0.755977,-0.865631,-0.147481,0.298159,-0.376767,-0.223078,-1.248788,-0.044063,0.015015,0.007380,...,0.079715,0.101494,-2.171568,-1.785178,0.008656,0.001777,0.016594,0.006169,0.004167,0.000753
6159,-0.089064,-0.914488,-0.351708,0.459777,-0.578556,0.073920,-1.248788,-0.044063,0.015015,0.007380,...,-1.061476,-2.576210,-1.303963,-1.559843,0.008656,0.001777,0.016594,0.006169,0.004167,0.000753
6160,-1.302592,-1.133438,-0.537369,0.641598,-0.687917,0.408043,-1.248788,-0.674726,-1.059972,-0.246772,...,1.220906,0.675284,-2.137765,-1.776398,0.008656,0.001777,0.016594,0.006169,0.004167,0.000753
6161,0.662268,0.080743,-1.131483,1.106251,-0.862894,1.261912,-1.248788,-0.044063,0.015015,0.007380,...,0.079715,-1.906769,0.712938,0.719840,0.008656,0.001777,0.016594,0.006169,0.004167,0.000753


In [20]:
# Convert scaled test data back to DataFrames
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=X_test.columns)

# Display the scaled test DataFrames
display(HTML("<h3>Scaled Test Data:</h3>"))
display(X_test_scaled_df)

Unnamed: 0,RedFighter,BlueFighter,RedOdds,BlueOdds,RedExpectedValue,BlueExpectedValue,Location,BlueAvgSigStrLanded,BlueAvgSigStrPct,BlueAvgTDLanded,...,AvgSubAttDif,AvgTDDif,FinishRoundTime,TotalFightTimeSecs,RedDecOdds,BlueDecOdds,RSubOdds,BSubOdds,RKOOdds,BKOOdds
0,0.778344,-1.194962,2.470334,-3.479668,5.420986,-1.132635,-0.100738,-0.865200,1.128782,-0.691039,...,-0.148524,-0.862472,0.600262,-1.065278,3.506795,1.291965,1.368034,-0.498113,4.015444,-2.530577
1,-0.547039,1.032545,1.356370,-1.499844,1.838321,-0.985932,-0.100738,-0.925877,1.128782,4.029297,...,-0.148524,2.614693,-1.495512,0.146261,1.739567,-0.771721,-0.519770,-1.576480,2.399770,-0.450827
2,-0.566033,-1.001344,1.189276,-1.257417,1.300922,-0.928448,-0.100738,-0.986027,-0.774482,0.522762,...,1.220906,0.784304,0.712938,0.719840,1.076857,-0.513760,4.010960,-1.576480,-0.346875,-0.450827
3,0.330922,-0.501919,1.764824,-2.146317,3.151965,-1.067469,-0.100738,-0.944344,0.272313,1.086029,...,0.992667,0.606429,0.712938,0.719840,1.297760,-0.599747,2.500717,-1.669882,1.053376,-0.328489
4,0.423783,-0.738965,-1.372842,1.247667,-0.901001,1.521785,-0.100738,-0.814547,1.128782,-0.310239,...,0.079715,0.629381,-0.819456,-1.434007,1.297760,0.001777,-0.142209,0.006169,-2.043332,0.000753
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
321,0.833217,-0.516395,0.001047,-0.671550,-0.108779,-0.518308,-0.100738,-0.961228,0.081987,-1.040106,...,0.536191,-0.994444,0.712938,0.719840,-0.867094,-0.634142,0.046572,1.200103,3.476886,-0.695503
322,0.919746,1.602541,0.910785,-0.873573,0.405256,-0.746418,-0.100738,-0.914269,0.557803,-0.508572,...,1.677382,-0.701811,0.712938,0.719840,0.414146,-0.599747,-0.614160,-1.347221,-0.293019,-0.450827
323,-0.950141,-0.221445,0.910785,-0.873573,0.405256,-0.746418,-0.100738,-0.845678,-0.679319,-0.088105,...,-1.061476,-1.763322,0.701670,-0.161013,0.008656,-1.012484,0.016594,-0.837756,0.004167,-0.083812
324,-0.732761,-1.175057,-0.188327,0.318361,-0.427821,-0.185953,-0.100738,-0.872587,1.509435,-1.040106,...,-0.719119,-1.837915,-1.168751,-0.646799,-0.624100,-0.513760,-0.802940,1.539746,-0.023740,-0.511996


##**Step 5: Load models training**

###**5.1 Deep Neural network Model**

In [21]:
def train_deep_nn_model(X_train, y_train, input_dim, output_dim, loss_function, activation='relu', final_activation='sigmoid', epochs=100, batch_size=64, class_weights=None):
    model = Sequential()

    # First layer
    model.add(Dense(128, activation=activation, input_shape=(input_dim,)))
    model.add(Dropout(0.3))

    # Second layer
    model.add(Dense(64, activation=activation, kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.2))

    # Third layer
    model.add(Dense(32, activation=activation))

    # Output layer
    model.add(Dense(output_dim, activation=final_activation))

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.00005), loss=loss_function, metrics=['accuracy'])

    # Define EarlyStopping callback
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1)

    # Train the model
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1, validation_split=0.2, class_weight=class_weights, callbacks=[early_stopping])

    return model

###**5.2 Machine learning Models**

In [22]:
# Define the models to train
model_classes = {
    "Random Forest": lambda: RandomForestClassifier(max_depth=10, random_state=42),
    "KNN": lambda: KNeighborsClassifier(n_neighbors=5, weights="distance"),
    "Decision Tree": lambda: DecisionTreeClassifier(max_depth=5, random_state=42),
    "CatBoost": lambda: CatBoostClassifier(depth=10, verbose=0),
    "XGB": lambda: XGBClassifier(max_depth=10, random_state=42)
}

##**Step 6: Training Models**

###**6.1 Training Winner deep neural network models**

In [23]:
# Train for y_train_winner
deep_nn_model_winner = train_deep_nn_model(
    X_train_scaled,
    y_train_winner,
    input_dim=X_train_scaled.shape[1],
    output_dim=1,
    loss_function='binary_crossentropy',
    final_activation='sigmoid',
    epochs=50,
    batch_size=16
)

# Save the models later for evaluation or inference
deep_nn_model_winner.save('deep_nn_model_winner.h5')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.5456 - loss: 0.7920 - val_accuracy: 0.6212 - val_loss: 0.7323
Epoch 2/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5890 - loss: 0.7414 - val_accuracy: 0.6448 - val_loss: 0.7104
Epoch 3/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6189 - loss: 0.7372 - val_accuracy: 0.6553 - val_loss: 0.7009
Epoch 4/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6433 - loss: 0.7104 - val_accuracy: 0.6594 - val_loss: 0.6963
Epoch 5/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6336 - loss: 0.7137 - val_accuracy: 0.6626 - val_loss: 0.6932
Epoch 6/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6331 - loss: 0.7159 - val_accuracy: 0.6610 - val_loss: 0.6926
Epoch 7/50
[1m309/309[0m 



###**6.2 Training Method deep neural network Models**

In [24]:
# Train for y_train_method
deep_nn_model_method = train_deep_nn_model(
    X_train_scaled,
    y_train_method,
    input_dim=X_train_scaled.shape[1],
    output_dim=len(np.unique(y_train_method)),
    loss_function='sparse_categorical_crossentropy',
    final_activation='softmax',
    epochs=50,
    batch_size=16
)

# Save the models later for evaluation or inference
deep_nn_model_method.save('deep_nn_model_method.h5')

Epoch 1/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.1992 - loss: 1.9583 - val_accuracy: 0.3852 - val_loss: 1.7525
Epoch 2/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.3782 - loss: 1.6720 - val_accuracy: 0.5320 - val_loss: 1.4853
Epoch 3/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4658 - loss: 1.4624 - val_accuracy: 0.5937 - val_loss: 1.3441
Epoch 4/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5294 - loss: 1.3577 - val_accuracy: 0.6326 - val_loss: 1.2395
Epoch 5/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5766 - loss: 1.2516 - val_accuracy: 0.6545 - val_loss: 1.1524
Epoch 6/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5876 - loss: 1.2071 - val_accuracy: 0.6642 - val_loss: 1.0844
Epoch 7/50
[1m309/309[0m 



###**6.3 Training multi Models**

In [25]:
# Define a function to train models
def train_models(model_classes, X_train, y_train):
    models = {}
    for name, model_class in model_classes.items():
        model = model_class()
        model.fit(X_train, y_train)
        models[name] = model
    return models

In [26]:
# Training 'winner' task (binary classification)
models_winner = train_models(model_classes, X_train_scaled, y_train_winner)

In [27]:
# Training 'method' task (multi-class classification)
models_method = train_models(model_classes, X_train_scaled, y_train_method)

#**7. Testing models**

###**7.1 Evaluate trained DL model**

In [28]:
def evaluate_dl_model_on_test(model, X_test, y_test, is_binary_classification=True):
    # Predict probabilities
    y_pred_prob = model.predict(X_test)

    # Convert probabilities to class predictions
    if is_binary_classification:
        y_pred = (y_pred_prob > 0.5).astype(int)  # For binary classification
    else:
        y_pred = np.argmax(y_pred_prob, axis=1)  # For multi-class classification

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    display(HTML(f'<h3>Deep learning model test accuracy: {accuracy:.4f}</h3>'))
    return accuracy

In [29]:
# Test DL model for 'winner' task (binary classification)
display(HTML('<h3>Evaluating Winner deep learning model...</h3>'))
dl_accuracy_winner = evaluate_dl_model_on_test(deep_nn_model_winner, X_test_scaled, y_test_winner, is_binary_classification=True)

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step


In [30]:
# Test DL model for 'method' task (multi-class classification)
display(HTML('<h3>Evaluating Method deep learning model...</h3>'))
dl_accuracy_method = evaluate_dl_model_on_test(deep_nn_model_method, X_test_scaled, y_test_method, is_binary_classification=False)

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


###**7.2 Evaluate trained ML models**

In [31]:
# Define a function to evaluate models
def evaluate_models(models, X_test, y_test):
    results = []
    for name, model in models.items():
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        results.append({"Model": name, "Test Accuracy": accuracy})
    return pd.DataFrame(results)

In [32]:
# Test ML models for 'winner' task (binary classification)
ml_results_winner = evaluate_models(models_winner, X_test_scaled, y_test_winner)
display(ml_results_winner)

Unnamed: 0,Model,Test Accuracy
0,Random Forest,0.684049
1,KNN,0.613497
2,Decision Tree,0.687117
3,CatBoost,0.690184
4,XGB,0.671779


In [33]:
# Test ML models for 'method' task (multi-class classification)
ml_results_method = evaluate_models(models_method, X_test_scaled, y_test_method)
display(ml_results_method)

Unnamed: 0,Model,Test Accuracy
0,Random Forest,0.714724
1,KNN,0.56135
2,Decision Tree,0.720859
3,CatBoost,0.711656
4,XGB,0.733129


#**8. Compare Winner models accuracy**

In [34]:
# Ensure consistency in lengths and construct DataFrame for the winner task
if len(ml_results_winner['Model']) == len(ml_results_winner['Test Accuracy']):
    results_winner = pd.DataFrame({
        'Model': ml_results_winner['Model'].tolist() + ['Deep Learning'],
        'Test Accuracy': ml_results_winner['Test Accuracy'].tolist() + [dl_accuracy_winner]
    })
    # Sort by Test Accuracy in descending order
    results_winner = results_winner.sort_values(by='Test Accuracy', ascending=False).reset_index(drop=True)
else:
    print("Mismatch in lengths for winner task. Check the input data.")
    results_winner = pd.DataFrame(columns=['Model', 'Test Accuracy'])

# Ensure consistency in lengths and construct DataFrame for the method task
if len(ml_results_method['Model']) == len(ml_results_method['Test Accuracy']):
    results_method = pd.DataFrame({
        'Model': ml_results_method['Model'].tolist() + ['Deep Learning'],
        'Test Accuracy': ml_results_method['Test Accuracy'].tolist() + [dl_accuracy_method]
    })
    # Sort by Test Accuracy in descending order
    results_method = results_method.sort_values(by='Test Accuracy', ascending=False).reset_index(drop=True)
else:
    print("Mismatch in lengths for method task. Check the input data.")
    results_method = pd.DataFrame(columns=['Model', 'Test Accuracy'])

# Display the results
display(HTML('<h3>Winner Task Results</h3>'))
display(results_winner)

display(HTML('<h3>Method Task Results</h3>'))
display(results_method)

Unnamed: 0,Model,Test Accuracy
0,CatBoost,0.690184
1,Decision Tree,0.687117
2,Random Forest,0.684049
3,Deep Learning,0.677914
4,XGB,0.671779
5,KNN,0.613497


Unnamed: 0,Model,Test Accuracy
0,XGB,0.733129
1,Decision Tree,0.720859
2,Random Forest,0.714724
3,CatBoost,0.711656
4,Deep Learning,0.690184
5,KNN,0.56135
