In [1]:
!pip install -q hopsworks[python]

In [2]:
import os
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from xgboost import plot_importance
from sklearn.metrics import mean_squared_error, r2_score
import hopsworks


import warnings
warnings.filterwarnings("ignore")

# 1. Read your Hopsworks API Key from a file or environment variable
#    For example, if stored in 'hopsworks-api-key.txt'
with open('../data/hopsworks-api-key.txt', 'r') as f:
    api_key = f.read().strip()

# 2. Set the environment variable for Hopsworks
os.environ["HOPSWORKS_API_KEY"] = api_key

project = hopsworks.login()
fs = project.get_feature_store()

# Retrieve feature groups for Clash Royale
player_stats_fg = fs.get_feature_group(
    name='clash_royale_onehotencoding',
    version=5,
)
game_events_fg = fs.get_feature_group(
    name='clash_royale_dataset_onehotencoding',
    version=5,
)

df = player_stats_fg.read()

2025-01-08 01:29:11,040 INFO: Initializing external client
2025-01-08 01:29:11,044 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-08 01:29:13,895 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1175700
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.69s) 


In [3]:
dfstats = player_stats_fg.read()

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.55s) 


In [4]:
dfdata = game_events_fg.read()
dfdata

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (119.30s) 


Unnamed: 0,player_name,player_name2,deck1,deck2,result
0,#PRG9UCG8C,#2URQP2CJ,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...",1
1,#QY8Q9LGY,#GG0QRG8Y,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
2,#URQJURR0,#9U8U9PV92,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
3,#2V8G9RPQ8,#LYPV9GPC,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
4,#PUVYRQRC,#2JC9PQ2L,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
...,...,...,...,...,...
399842,#22GC8P92,#22LJP8CYQ,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
399843,#VCL2QYJG,#2Y0CJU9RQ,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
399844,#PRQUQL9YY,#Y28JJ9G0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
399845,#2YQ9RPJ20,#PCL8GG2G,"[0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1


In [5]:
dfdata

Unnamed: 0,player_name,player_name2,deck1,deck2,result
0,#PRG9UCG8C,#2URQP2CJ,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...",1
1,#QY8Q9LGY,#GG0QRG8Y,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
2,#URQJURR0,#9U8U9PV92,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
3,#2V8G9RPQ8,#LYPV9GPC,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
4,#PUVYRQRC,#2JC9PQ2L,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
...,...,...,...,...,...
399842,#22GC8P92,#22LJP8CYQ,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
399843,#VCL2QYJG,#2Y0CJU9RQ,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
399844,#PRQUQL9YY,#Y28JJ9G0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
399845,#2YQ9RPJ20,#PCL8GG2G,"[0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1


In [6]:
df_1 = dfdata.iloc[:1000,:]
df_test = dfdata.iloc[1000:1100, :]

In [7]:
df_test['result']

1000    1
1001    1
1002    1
1003    1
1004    1
       ..
1095    1
1096    1
1097    1
1098    1
1099    1
Name: result, Length: 100, dtype: int64

In [8]:
# prompt: what are the different values for the column result

import pandas as pd

# Assuming dfdata is your DataFrame
# Replace with your actual DataFrame if it's named differently

# Check for unique values in the 'result' column
unique_results = dfdata['result'].unique()

# Print the unique values
unique_results

array([1], dtype=int64)

In [9]:
import pandas as pd

# Assuming the DataFrame is named df_1
# Create binary columns for deck1
deck1_columns = [f"deck1_{i}" for i in range(181)]
deck1_df = pd.DataFrame(df_1['deck1'].tolist(), columns=deck1_columns)

# Create binary columns for deck2
deck2_columns = [f"deck2_{i}" for i in range(181)]
deck2_df = pd.DataFrame(df_1['deck2'].tolist(), columns=deck2_columns)

# Combine all columns with the result column
result_column = df_1['result']
final_df = pd.concat([deck1_df, deck2_df, result_column], axis=1)

# Display or save the final DataFrame
final_df.head()  # To preview the DataFrame
# You can save the DataFrame to a CSV file if needed:


Unnamed: 0,deck1_0,deck1_1,deck1_2,deck1_3,deck1_4,deck1_5,deck1_6,deck1_7,deck1_8,deck1_9,...,deck2_172,deck2_173,deck2_174,deck2_175,deck2_176,deck2_177,deck2_178,deck2_179,deck2_180,result
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [10]:
# Count non-zero elements in each row for deck1 and deck2
deck1_non_zero_count = (deck1_df != 0).sum(axis=1)
deck2_non_zero_count = (deck2_df != 0).sum(axis=1)

print(deck1_non_zero_count)

0      8
1      8
2      8
3      8
4      7
      ..
995    8
996    7
997    8
998    8
999    7
Length: 1000, dtype: int64


In [11]:
'''
DOBBIAMO CREARE ANCHE I DATI OPPOSTI!
'''

'\nDOBBIAMO CREARE ANCHE I DATI OPPOSTI!\n'

In [12]:
# Create a new DataFrame with deck2 first, then deck1, and result column set to 0
result_column_zero = pd.Series([0] * len(df_1), name="result")
reversed_df = pd.concat([deck2_df, deck1_df, result_column_zero], axis=1)
reversed_df

Unnamed: 0,deck2_0,deck2_1,deck2_2,deck2_3,deck2_4,deck2_5,deck2_6,deck2_7,deck2_8,deck2_9,...,deck1_172,deck1_173,deck1_174,deck1_175,deck1_176,deck1_177,deck1_178,deck1_179,deck1_180,result
0,0,0,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
997,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
998,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [13]:
df_complete = pd.concat([final_df, reversed_df], ignore_index=True)
df_complete

Unnamed: 0,deck1_0,deck1_1,deck1_2,deck1_3,deck1_4,deck1_5,deck1_6,deck1_7,deck1_8,deck1_9,...,deck2_172,deck2_173,deck2_174,deck2_175,deck2_176,deck2_177,deck2_178,deck2_179,deck2_180,result
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1996,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1998,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
df_shuffled = df_complete.sample(frac=1, random_state=42).reset_index(drop=True)
final_df = df_shuffled.copy()
final_df

Unnamed: 0,deck1_0,deck1_1,deck1_2,deck1_3,deck1_4,deck1_5,deck1_6,deck1_7,deck1_8,deck1_9,...,deck2_172,deck2_173,deck2_174,deck2_175,deck2_176,deck2_177,deck2_178,deck2_179,deck2_180,result
0,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1997,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
1998,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Prepare data
X = final_df.iloc[:, :-1].values  # Features: all columns except the result
y = final_df.iloc[:, -1].values  # Target: the result column

In [16]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create the neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='tanh')  # Output layer with tanh activation for -1 to 1 range
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Epoch 1/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.4689 - loss: 0.4894 - val_accuracy: 0.3500 - val_loss: 0.3640
Epoch 2/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5572 - loss: 0.2619 - val_accuracy: 0.2406 - val_loss: 0.3974
Epoch 3/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6142 - loss: 0.2272 - val_accuracy: 0.2313 - val_loss: 0.4200
Epoch 4/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6118 - loss: 0.2217 - val_accuracy: 0.2094 - val_loss: 0.4488
Epoch 5/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6215 - loss: 0.2126 - val_accuracy: 0.2031 - val_loss: 0.4923
Epoch 6/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6468 - loss: 0.1966 - val_accuracy: 0.2000 - val_loss: 0.4986
Epoch 7/20
[1m40/40[0m [32m━━━━━━━━━

In [17]:
y = (y == 1).astype(int)  # Map -1 to 0 and keep 1 as 1
y

array([0, 1, 0, ..., 1, 0, 0])

In [18]:
print("Unique classes in y after transformation:", set(y))

Unique classes in y after transformation: {0, 1}


In [19]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [20]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report

# Initialize the XGBoost model
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')

# Train the model
xgb_model.fit(X_train, y_train)

# Predict outcomes for the test set
y_pred = xgb_model.predict(X_test)

# Invert predictions if necessary
y_pred_corrected = 1 - y_pred  # Flip the predictions

# Evaluate the model with corrected predictions
xgb_accuracy_corrected = accuracy_score(y_test, y_pred_corrected)
xgb_report_corrected = classification_report(y_test, y_pred_corrected)

# Print results
print(f"XGBoost Test Accuracy (Corrected): {xgb_accuracy_corrected:.4f}")
print("Classification Report (Corrected Predictions):\n", xgb_report_corrected)


XGBoost Test Accuracy (Corrected): 0.9000
Classification Report (Corrected Predictions):
               precision    recall  f1-score   support

           0       0.91      0.89      0.90       200
           1       0.89      0.92      0.90       200

    accuracy                           0.90       400
   macro avg       0.90      0.90      0.90       400
weighted avg       0.90      0.90      0.90       400



In [21]:
df_test

Unnamed: 0,player_name,player_name2,deck1,deck2,result
1000,#PPGQVRJ2V,#YVUQ2Q2RQ,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
1001,#8JU2RCGU9,#2QRYGPC2R,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
1002,#9LLRGCG90,#8LPL20J,"[0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, ...",1
1003,#289JPR29,#2YJJLYPPG,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
1004,#8JYJCPVJV,#YYJJYC8RP,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
...,...,...,...,...,...
1095,#9RL2LC8QG,#YJ09LUGG9,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
1096,#U088GYV,#CV8UCGVQ,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
1097,#2CGL8V9UY,#G20PPL0JG,"[0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
1098,#JP9LVJJ,#2C2PJJYPG,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1


In [22]:
df_test['result']

1000    1
1001    1
1002    1
1003    1
1004    1
       ..
1095    1
1096    1
1097    1
1098    1
1099    1
Name: result, Length: 100, dtype: int64

In [23]:
# Creare le colonne binarie per deck1
deck1_test_df = pd.DataFrame(df_test['deck1'].tolist(), columns=deck1_columns).reset_index(drop=True)

# Creare le colonne binarie per deck2
deck2_test_df = pd.DataFrame(df_test['deck2'].tolist(), columns=deck2_columns).reset_index(drop=True)

# Combinare tutte le colonne con la colonna result
result_column_test = df_test['result'].reset_index(drop=True)
final_test_df = pd.concat([deck1_test_df, deck2_test_df, result_column_test], axis=1).reset_index(drop=True)

# Contare gli elementi non-zero per ogni riga in deck1 e deck2 (opzionale, se richiesto)
deck1_test_non_zero_count = (deck1_test_df != 0).sum(axis=1)
deck2_test_non_zero_count = (deck2_test_df != 0).sum(axis=1)
final_test_df

Unnamed: 0,deck1_0,deck1_1,deck1_2,deck1_3,deck1_4,deck1_5,deck1_6,deck1_7,deck1_8,deck1_9,...,deck2_172,deck2_173,deck2_174,deck2_175,deck2_176,deck2_177,deck2_178,deck2_179,deck2_180,result
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
96,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
97,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
98,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [24]:
X_test = final_test_df.iloc[:, :-1].values
y_test = final_test_df.iloc[:, -1].values

In [25]:
# Predict outcomes for the test set
y_pred = xgb_model.predict(X_test)

# Invert predictions if necessary
y_pred_corrected = 1 - y_pred  # Flip the predictions

# Evaluate the model with corrected predictions
xgb_accuracy_corrected = accuracy_score(y_test, y_pred)
xgb_report_corrected = classification_report(y_test, y_pred)

# Print results
print(f"XGBoost Test Accuracy (Corrected): {xgb_accuracy_corrected:.4f}")
print("Classification Report (Corrected Predictions):\n", xgb_report_corrected)

XGBoost Test Accuracy (Corrected): 0.3400
Classification Report (Corrected Predictions):
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.34      0.51       100

    accuracy                           0.34       100
   macro avg       0.50      0.17      0.25       100
weighted avg       1.00      0.34      0.51       100



In [29]:
# Predicting target values on the test set

# Calculating Mean Squared Error (MSE) using sklearn
mse = mean_squared_error(y_test, y_pred)
print("MSE:", mse)

# Calculating R squared using sklearn
r2 = r2_score(y_test, y_pred)
print("R squared:", r2)

MSE: 0.66
R squared: 0.0


In [30]:
# Print 10 examples of y_test and y_pred_corrected
print("\nExamples of y_test and Corrected Predictions:")
for i in range(100):
    print(f"{y_test[i]} {y_pred[i]}")


Examples of y_test and Corrected Predictions:
1 1
1 0
1 1
1 0
1 0
1 0
1 0
1 1
1 1
1 0
1 1
1 0
1 1
1 1
1 0
1 0
1 0
1 0
1 1
1 0
1 0
1 0
1 1
1 0
1 0
1 1
1 0
1 0
1 1
1 0
1 1
1 0
1 0
1 0
1 0
1 0
1 0
1 0
1 1
1 0
1 0
1 0
1 0
1 1
1 0
1 0
1 1
1 0
1 0
1 0
1 0
1 0
1 0
1 0
1 1
1 1
1 1
1 0
1 0
1 0
1 1
1 0
1 1
1 1
1 0
1 0
1 0
1 0
1 1
1 0
1 1
1 0
1 0
1 0
1 0
1 0
1 1
1 1
1 1
1 1
1 0
1 1
1 0
1 0
1 1
1 1
1 1
1 0
1 0
1 1
1 1
1 0
1 0
1 1
1 0
1 0
1 0
1 0
1 0
1 0


In [31]:
# Creating a directory for the model artifacts if it doesn't exist
model_dir = "clash_royale_model"
if not os.path.exists(model_dir):
    os.mkdir(model_dir)

In [32]:
from hsml.schema import Schema
from hsml.model_schema import ModelSchema

# Creating input and output schemas using the 'Schema' class for features (X) and target variable (y)
input_schema = Schema(X_train)
output_schema = Schema(y_train)

# Creating a model schema using 'ModelSchema' with the input and output schemas
model_schema = ModelSchema(input_schema=input_schema, output_schema=output_schema)

# Converting the model schema to a dictionary representation
schema_dict = model_schema.to_dict()

In [33]:
xgb_model.save_model(model_dir + "/model.json")

In [34]:
res_dict = { 
        "MSE": str(mse),
        "R squared": str(r2),
    }

In [36]:
mr = project.get_model_registry()

# Creating a Python model in the model registry named 'air_quality_xgboost_model'

# Convert X_test to a DataFrame to use the sample method
X_test_df = pd.DataFrame(X_test)

aq_model = mr.python.create_model(
    name="clash_royale_xgboost_model", 
    metrics= res_dict,
    model_schema=model_schema,
    input_example=X_test_df.sample().values, 
    description="Win Probability rate",
)

# Saving the model artifacts to the 'air_quality_model' directory in the model registry
aq_model.save(model_dir)

  0%|          | 0/6 [00:00<?, ?it/s]

Uploading: 0.000%|          | 0/322920 elapsed<00:00 remaining<?

Uploading: 0.000%|          | 0/1088 elapsed<00:00 remaining<?

Uploading: 0.000%|          | 0/224 elapsed<00:00 remaining<?

Model created, explore it at https://c.app.hopsworks.ai:443/p/1175700/models/clash_royale_xgboost_model/1


Model(name: 'clash_royale_xgboost_model', version: 1)