In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report

# Importing All Data

In [2]:
team_df = pd.read_json('data/team_data.json')
team_df = team_df.fillna(0)
match_data = pd.read_json('data/match_data.json')


# Formatting All Data

In [3]:
def extract_regression_features(team_stats, matches):
    X = []  # Features
    y = []  # Target variable (score difference)
    y1 = []
    skipped = 0

    for match in matches:
        blue_teams = match["alliances"][0]["teams"]
        red_teams = match["alliances"][1]["teams"]

        if len(blue_teams) != 2 or len(red_teams) != 2:
            # print(f"Skipping match because it does not have 2 teams on each alliance")
            skipped += 1
            continue

        if blue_teams[0]["team"]["name"] not in team_stats or red_teams[0]["team"]["name"] not in team_stats or blue_teams[1]["team"]["name"] not in team_stats or red_teams[1]["team"]["name"] not in team_stats: 
            # print(f"Skipping match because a team is not in the team stats")
            skipped += 1
            continue
        
        # Calculate aggregate stats for each alliance
        blue_opr = sum(team_stats[team["team"]["name"]]["opr"] for team in blue_teams)
        blue_dpr = sum(team_stats[team["team"]["name"]]["dpr"] for team in blue_teams)
        blue_ccwm = sum(team_stats[team["team"]["name"]]["ccwm"] for team in blue_teams)
        blue_w_pct = sum(team_stats[team["team"]["name"]]["w_pct"] for team in blue_teams)
        blue_ap_per_match = sum(team_stats[team["team"]["name"]]["ap_per_match"] for team in blue_teams)
        blue_awp_per_match = sum(team_stats[team["team"]["name"]]["awp_per_match"] for team in blue_teams)
        blue_wp_per_match = sum(team_stats[team["team"]["name"]]["wp_per_match"] for team in blue_teams)
        blue_trueskill = sum(team_stats[team["team"]["name"]]["trueskill"] for team in blue_teams)

        red_opr = sum(team_stats[team["team"]["name"]]["opr"] for team in red_teams)
        red_dpr = sum(team_stats[team["team"]["name"]]["dpr"] for team in red_teams)
        red_ccwm = sum(team_stats[team["team"]["name"]]["ccwm"] for team in red_teams)
        red_w_pct = sum(team_stats[team["team"]["name"]]["w_pct"] for team in red_teams)
        red_ap_per_match = sum(team_stats[team["team"]["name"]]["ap_per_match"] for team in red_teams)
        red_awp_per_match = sum(team_stats[team["team"]["name"]]["awp_per_match"] for team in red_teams)
        red_wp_per_match = sum(team_stats[team["team"]["name"]]["wp_per_match"] for team in red_teams)
        red_trueskill = sum(team_stats[team["team"]["name"]]["trueskill"] for team in red_teams)

        # Feature vector for this match
        features = [
            blue_opr, blue_dpr, blue_ccwm, blue_w_pct, blue_ap_per_match, blue_awp_per_match, blue_wp_per_match, blue_trueskill,
            red_opr, red_dpr, red_ccwm, red_w_pct, red_ap_per_match, red_awp_per_match, red_wp_per_match, red_trueskill
        ]
        
        X.append(features)
        
        # Calculate the score difference (blue - red)
        blue_score = match["alliances"][0]["score"]
        red_score = match["alliances"][1]["score"]
        if blue_score > red_score:
            y.append(1)
        else:
            y.append(0)
        y1.append(blue_score - red_score) # For Neural Network
    print(f"{skipped} entries skipped")
    return X, y, y1

X, y, y_nn = extract_regression_features(team_df, match_data['matches'])


915 entries skipped


# Training the Dataset

## Logistic Regression

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=5000)
model.fit(X_train, y_train)

In [5]:
# Predict on the test set
y_pred = model.predict(X_test)

print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.80      0.83      0.81     10941
           1       0.77      0.74      0.75      8613

    accuracy                           0.79     19554
   macro avg       0.78      0.78      0.78     19554
weighted avg       0.79      0.79      0.79     19554



## Neural Network Score Predictor

In [17]:
X = np.array(X)
y_nn = np.array(y_nn)
X_train, X_val, y_train, y_val = train_test_split(X, y_nn, test_size=0.2, random_state=42)

In [18]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Initialize the model
nn_model = Sequential()

# print("X_val type:", type(X_val))
# print("X_val shape:", X_val.shape)
# print("X_val dtype:", X_val.dtype)

# print("y_val type:", type(y_val))
# print("y_val shape:", y_val.shape)
# print("y_val dtype:", y_val.dtype)

# # Check for NaN or infinite values
# print("NaN in X_val:", np.isnan(X_val).any())
# print("NaN in y_val:", np.isnan(y_val).any())
# print("Inf in X_val:", np.isinf(X_val).any())
# print("Inf in y_val:", np.isinf(y_val).any())

# Add input layer and first hidden layer
nn_model.add(Dense(64, activation='relu', input_shape=(X.shape[1],)))

# Add more hidden layers as needed
nn_model.add(Dense(32, activation='relu'))
nn_model.add(Dense(16, activation='relu'))

# Output layer for regression
nn_model.add(Dense(1, activation='linear'))  # Linear activation for regression

# Compile the model
nn_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

In [19]:
try:
    history = nn_model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val), batch_size=10, verbose=1)
    print("Training completed successfully.")
except Exception as e:
    print("Error during training:", e)

Epoch 1/100
[1m7822/7822[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - loss: 1371.9496 - mean_absolute_error: 29.0726 - val_loss: 1328.8658 - val_mean_absolute_error: 28.7398
Epoch 2/100
[1m7822/7822[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2ms/step - loss: 1290.1652 - mean_absolute_error: 28.2034 - val_loss: 1320.8254 - val_mean_absolute_error: 28.6297
Epoch 3/100
[1m7822/7822[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2ms/step - loss: 1290.4941 - mean_absolute_error: 28.2302 - val_loss: 1296.7008 - val_mean_absolute_error: 28.3554
Epoch 4/100
[1m7822/7822[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2ms/step - loss: 1275.2634 - mean_absolute_error: 28.0376 - val_loss: 1284.7983 - val_mean_absolute_error: 28.2180
Epoch 5/100
[1m7822/7822[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 2ms/step - loss: 1261.5514 - mean_absolute_error: 27.8887 - val_loss: 1315.8307 - val_mean_absolute_error: 28.5335
Epoch 6/100
[1m7822

In [20]:
# Evaluate the model
test_loss, test_mae = nn_model.evaluate(X_val, y_val, verbose=1)
print(f"Test Mean Absolute Error: {test_mae:.2f}")
print(f"Test Loss: {test_loss:.2f}")

[1m252/612[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m0s[0m 3ms/step - loss: 1259.5850 - mean_absolute_error: 28.0076

[1m612/612[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 1262.3313 - mean_absolute_error: 27.9376
Test Mean Absolute Error: 27.95
Test Loss: 1268.10


# Saving The Model

In [31]:
import joblib
nn_model.save('nn_model.h5')
joblib.dump(model, 'model.joblib')



['model.joblib']

# Making Predictions

In [33]:
nn_model = tf.keras.models.load_model('nn_model.h5')
model = joblib.load('model.joblib')

def predict(red_alliance, blue_alliance):
    blue_opr = sum(team_df[team]["opr"] for team in blue_alliance)
    blue_dpr = sum(team_df[team]["dpr"] for team in blue_alliance)
    blue_ccwm = sum(team_df[team]["ccwm"] for team in blue_alliance)
    blue_w_pct = sum(team_df[team]["w_pct"] for team in blue_alliance)
    blue_ap_per_match = sum(team_df[team]["ap_per_match"] for team in blue_alliance)
    blue_awp_per_match = sum(team_df[team]["awp_per_match"] for team in blue_alliance)
    blue_wp_per_match = sum(team_df[team]["wp_per_match"] for team in blue_alliance)
    blue_trueskill = sum(team_df[team]["trueskill"] for team in blue_alliance)

    red_opr = sum(team_df[team]["opr"] for team in red_alliance)
    red_dpr = sum(team_df[team]["dpr"] for team in red_alliance)
    red_ccwm = sum(team_df[team]["ccwm"] for team in red_alliance)
    red_w_pct = sum(team_df[team]["w_pct"] for team in red_alliance)
    red_ap_per_match = sum(team_df[team]["ap_per_match"] for team in red_alliance)
    red_awp_per_match = sum(team_df[team]["awp_per_match"] for team in red_alliance)
    red_wp_per_match = sum(team_df[team]["wp_per_match"] for team in red_alliance)
    red_trueskill = sum(team_df[team]["trueskill"] for team in red_alliance)

    features = [
        blue_opr, blue_dpr, blue_ccwm, blue_w_pct, blue_ap_per_match, blue_awp_per_match, blue_wp_per_match, blue_trueskill,
        red_opr, red_dpr, red_ccwm, red_w_pct, red_ap_per_match, red_awp_per_match, red_wp_per_match, red_trueskill
    ]

    res = model.predict_proba([features])[0][1]
    dif = nn_model.predict(np.array(features, dtype=np.float32).reshape(1, -1))
    if res > 0.5:
        return f"Blue Alliance Will Win, with a probability of {res*100:.2f}% by {dif[0][0]:.2f} points"
    else:
        return f"Red Alliance Will Win, with a probability of {100*(1-res):.2f}% by {-dif[0][0]:.2f} points"

red = ["229V", "16868K"]
blue = ["100A", "2055X"]
print(predict(red, blue))



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 137ms/step
[[-44.780308]]
Red Alliance Will Win, with a probability of 91.69% by 44.78 points
