In [54]:
import os
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from xgboost import plot_importance
from sklearn.metrics import mean_squared_error, r2_score
import hopsworks


import warnings
warnings.filterwarnings("ignore")

# 1. Read your Hopsworks API Key from a file or environment variable
#    For example, if stored in 'hopsworks-api-key.txt'
with open('../data/hopsworks-api-key.txt', 'r') as f:
    api_key = f.read().strip()

# 2. Set the environment variable for Hopsworks
os.environ["HOPSWORKS_API_KEY"] = api_key

project = hopsworks.login()
fs = project.get_feature_store()

# Retrieve feature groups for Clash Royale
player_stats_fg = fs.get_feature_group(
    name='clash_royale_onehotencoding',
    version=4,
)
game_events_fg = fs.get_feature_group(
    name='clash_royale_dataset_onehotencoding',
    version=4,
)


2025-01-05 23:07:47,498 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-01-05 23:07:47,504 INFO: Initializing external client
2025-01-05 23:07:47,506 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-05 23:07:48,965 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1175700


In [48]:
# Select all columns and add prefixes manually
player_stats = player_stats_fg.select_all()
game_events = game_events_fg.select_all()


feature_view = fs.get_or_create_feature_view(
    name='clash_royale_fv',
    description="Clash Royale game prediction features",
    version=1,
    labels=['player_name'],
    query=player_stats,
)


In [66]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd

# Identify and encode categorical columns
categorical_columns = ['deck', 'deck2', 'result2']  # Replace with actual columns
encoder = LabelEncoder()

# Combine train and test data for fitting the encoder
combined_data = pd.concat([X_train, X_test])

for col in categorical_columns:
    encoder.fit(combined_data[col].astype(str))
    X_train[col] = encoder.transform(X_train[col].astype(str))
    X_test[col] = encoder.transform(X_test[col].astype(str))

# Convert player_name2 to category type
X_train['player_name2'] = X_train['player_name2'].astype('category')
X_test['player_name2'] = X_test['player_name2'].astype('category')

# Train the model
xgb_regressor = XGBRegressor(enable_categorical=True)
xgb_regressor.fit(X_train, y_train)

# Predict and evaluate
y_pred = xgb_regressor.predict(X_test)
print(f"MSE: {mean_squared_error(y_test, y_pred)}")
print(f"R2: {r2_score(y_test, y_pred)}")


MSE: 0.0
R2: 1.0


In [65]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print(f"MAE: {mae}")
print(f"RMSE: {rmse}")
print(f"R²: {r2_score(y_test, y_pred)}")


MAE: 0.0
RMSE: 0.0
R²: 1.0
