In [None]:
# Assuming you have a DataFrame called 'df' with a column 'side' indicating the side of the game (e.g., 'blue' or 'red')
# and a column 'result' indicating the result of the game (1=win, 0=loss)

# Calculate the total number of games for each side
total_games = df_objectives.groupby('side').size()

# Calculate the number of wins for each side
wins = df_objectives[df_objectives['result'] == 1].groupby('side').size()

# Calculate the win percentage for each side
win_percentage = (wins / total_games) * 100

# Print the win percentage for each side
print(win_percentage)

## Clustering

In [None]:
corrmat_all = df.corr(numeric_only=True)

sns.heatmap(corrmat_all, vmax=.8, square=True)

In [None]:
# Isolate the column of interest
column_name = 'result'  # Replace 'results' with the actual column name
correlation_values = corrmat_all[column_name]

# Sort the correlation values in descending order
sorted_correlation_values = correlation_values.sort_values(ascending=False)

# Print the sorted correlation values
print(sorted_correlation_values.head(15))

In [None]:
# The data is not strongly correlated except between energy and loudness, which makes sense. 
# Popularity has a correspondence to release data, which also makes sense, as more recent songs are probably more popular. 
# Length and energy seem to have a correlation - perhaps shorter songs are more energetic?
corrmat = df_objectives.corr(numeric_only=True)
# f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(corrmat, vmax=.8, square=True);

In [None]:
sns.set_theme(style="ticks")

# Show the joint distribution using kernel density estimation
g = sns.jointplot(
    data=df_objectives,
    x="turretplates", y="golddiffat15", hue="result",
    kind="kde",
)

In [None]:
sns.set_theme(style="ticks")

# Show the joint distribution using kernel density estimation
g = sns.jointplot(
    data=df_objectives,
    x="kills", y="damagetochampions", hue="result",
    kind="kde",
)

In [None]:
# Import KMeans and build a model
from sklearn.cluster import KMeans
wcss = []

for i in range(1, 11):
    kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42, n_init='auto')
    kmeans.fit(X)
    wcss.append(kmeans.inertia_)

In [None]:
import seaborn as sns

# Use that model to decide, using the Elbow Method, the best number of clusters to build
plt.figure(figsize=(10,5))
sns.lineplot(x=range(1, 11), y=wcss,marker='o',color='red')
plt.title('Elbow')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()

In [None]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters = 3, n_init='auto', init = 'k-means++')
kmeans.fit(X)
labels = kmeans.predict(X)

ax = plt.axes()
ax.set_facecolor("lightblue")
plt.scatter(df_objectives['kills'],df_objectives['damagetochampions'],c = labels)
plt.xlabel('Kills')
plt.ylabel('Damage Dealt to Champions')
plt.show()

In [None]:
labels = kmeans.labels_
correct_labels = sum(y == labels)

print("Result: %d out of %d samples were correctly labeled." % (correct_labels, y.size))
print('Accuracy score: {0:0.3f}'. format(correct_labels/float(y.size)))

In [None]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters = 2, n_init='auto', init = 'k-means++')
kmeans.fit(X)
labels = kmeans.predict(X)

ax = plt.axes()
ax.set_facecolor("lightblue")
plt.scatter(df_objectives['golddiffat15'],df_objectives['turretplates'],c = labels)
plt.xlabel('Kills')
plt.ylabel('Damage Dealt to Champions')
plt.show()

In [None]:
labels = kmeans.labels_
correct_labels = sum(y == labels)

print("Result: %d out of %d samples were correctly labeled." % (correct_labels, y.size))
print('Accuracy score: {0:0.3f}'. format(correct_labels/float(y.size)))

In [None]:
df_objectives.plot.scatter('turretplates','golddiffat15')

In [None]:
import os
import warnings
import datetime as dt
import math

from pandas.plotting import autocorrelation_plot
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.preprocessing import MinMaxScaler
#from common.utils import load_data, mape
from IPython.display import Image

%matplotlib inline
pd.options.display.float_format = '{:,.2f}'.format
np.set_printoptions(precision=2)
warnings.filterwarnings("ignore") # specify to ignore warning messages


In [None]:
df_times.plot(y='gamelength', subplots=True, figsize=(15, 8), fontsize=12)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Game Length', fontsize=12)
plt.show()

In [None]:
# Create training and testing datasets, note the strong seasonal component in the data
train_start_dt = '2023-10-01 19:59:21'
test_start_dt = '2023-11-10 00:00:00'


In [None]:
# Visualize the differences
df_times[(df_times.index < test_start_dt) & (df_times.index >= train_start_dt)][['gamelength']].rename(columns={'gamelength':'train'}) \
    .join(df_times[test_start_dt:][['gamelength']].rename(columns={'gamelength':'test'}), how='outer') \
    .plot(y=['train', 'test'], figsize=(15, 8), fontsize=12)
plt.xlabel('timestamp', fontsize=12)
plt.ylabel('gamelength', fontsize=12)
plt.show()

In [None]:
train = df_times.copy()[(df_times.index >= train_start_dt) & (df_times.index < test_start_dt)][['gamelength']]
test = df_times.copy()[df_times.index >= test_start_dt][['gamelength']]

print('Training data shape: ', train.shape)
print('Test data shape: ', test.shape)

In [None]:
# scale
scaler = MinMaxScaler()
train['gamelength'] = scaler.fit_transform(train)
train.head(10)

In [None]:
# scale also the test data
test['gamelength'] = scaler.transform(test)
test.head()

In [None]:
# horizon value: 3 hours, i.e., the number of steps to forecast ahead
HORIZON = 3

order = (4, 1, 0)
seasonal_order = (1, 1, 0, 24)

model = SARIMAX(endog=train, order=order, seasonal_order=seasonal_order)
results = model.fit()

print(results.summary())

In [None]:
test_shifted = test.copy()

HORIZON = 3  # Replace this with your actual HORIZON value

for t in range(1, HORIZON):
    test_shifted['gamelength+' + str(t)] = test_shifted['gamelength'].shift(-t)

test_shifted = test_shifted.dropna(how='any')
print(test_shifted.head(5))


In [None]:
# Above, the data is shifted horizontally according to its horizon point.
# Make predictions on your test data using this sliding window approach in a loop the size of the test data length:
# NOTE: Execution time was about 2.8 mins in my computer: 169.315 seconds. Both VSCode and Colab measure the execution time.
# Skipping %% time because it is not supported in VSCode. And silencing SARIMAX fit to avoid printing the model fit details.
#
# Should conclude in 46 iterations.

training_window = 720 # dedicate 30 days (720 hours) for training
train_ts = train['gamelength']
test_ts = test_shifted

history = [x for x in train_ts]
history = history[(-training_window):]

predictions = list()

order = (2, 1, 0)
seasonal_order = (1, 1, 0, 24)


for t in range(test_ts.shape[0]):
    model = SARIMAX(endog=history, order=order, seasonal_order=seasonal_order)
    # Silence
    model_fit = model.fit(disp=False)
    yhat = model_fit.forecast(steps = HORIZON)
    predictions.append(yhat)
    obs = list(test_ts.iloc[t])
    # move the training window
    history.append(obs[0])
    history.pop(0)
    print(test_ts.index[t])
    print(t+1, ': predicted =', yhat, 'expected =', obs)


In [None]:
eval_df = pd.DataFrame(predictions, columns=['t+'+str(t) for t in range(1, HORIZON+1)])
eval_df['timestamp'] = test.index[0:len(test.index)-HORIZON+1]
eval_df = pd.melt(eval_df, id_vars='timestamp', value_name='prediction', var_name='h')
eval_df['actual'] = np.array(np.transpose(test_ts)).ravel()
eval_df[['prediction', 'actual']] = scaler.inverse_transform(eval_df[['prediction', 'actual']])
eval_df.head()

In [None]:
from sklearn.metrics import mean_absolute_percentage_error

if(HORIZON > 1):
    eval_df['APE'] = (eval_df['prediction'] - eval_df['actual']).abs() / eval_df['actual']
    print(eval_df.groupby('h')['APE'].mean())

# calculate one step's MAPE
print('One step forecast MAPE: ', (mean_absolute_percentage_error(eval_df[eval_df['h'] == 't+1']['prediction'], eval_df[eval_df['h'] == 't+1']['actual']))*100, '%')


In [None]:
# Print the multi-step forecast MAPE:

print('Multi-step forecast MAPE: ', mean_absolute_percentage_error(eval_df['prediction'], eval_df['actual'])*100, '%')

In [None]:
# show the accuracy measurement visually, with a plot

if(HORIZON == 1):
    ## Plotting single step forecast
    eval_df.plot(x='timestamp', y=['actual', 'prediction'], style=['r', 'b'], figsize=(15, 8))

else:
    ## Plotting multi step forecast
    plot_df = eval_df[(eval_df.h=='t+1')][['timestamp', 'actual']]
    for t in range(1, HORIZON+1):
        plot_df['t+'+str(t)] = eval_df[(eval_df.h=='t+'+str(t))]['prediction'].values

    fig = plt.figure(figsize=(15, 8))
    ax = fig.add_subplot(111)
    ax.plot(plot_df['timestamp'], plot_df['actual'], color='red',linewidth=2, label='actual')
    for t in range(1, HORIZON+1):
        x = plot_df['timestamp'][(t-1):]
        y = plot_df['t+'+str(t)][0:len(x)]
        ax.plot(x, y, color='blue', linewidth=2*math.pow(.9,t),alpha=math.pow(0.6,t), label='t+'+str(t))
    ax.legend(loc='best')

plt.xlabel('timestamp', fontsize=12)
plt.ylabel('load', fontsize=12)
plt.show()