In [598]:
import requests
import json
import pandas as pd
import re
import numpy as np
from datetime import datetime
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score, f1_score, confusion_matrix
import random

pd.set_option("display.max_columns", None)  # Show all columns
pd.set_option("display.width", 1000)       # Set a wide display width
pd.set_option("display.max_rows", None)    # Show all rows (useful for large datasets)

In [599]:
file = open("matches.json", "r", encoding="utf-8")
data = json.load(file)

rows = []

for match in data:
    event_date = datetime.strptime(match["event"]["eventDate"], "%Y-%m-%dT%H:%M:%S.%fZ")
    rows.append(
        {
            "Event Name": match["event"]["eventName"],
            "Event Location": match["event"]["location"],
            "Event Date": match["event"]["eventDate"],
            "Weight Class": match["weightCategory"],
            "Arm": match["arm"],
            "Armwrestler A Name": f"{match['awA']['armwrestler']['firstName']} {match['awA']['armwrestler']['lastName']}",
            "Armwrestler A Country": match["awA"]["armwrestler"]["country"],
            "Armwrestler A Age": event_date.year - int(match["awA"]["armwrestler"]["dateOfBirth"][:4]) 
                                if match["awA"]["armwrestler"]["dateOfBirth"] else None,
            "Armwrestler A Weight": float(match["awA"]["matchMeasurements"]["weight"].replace("kg", "")) 
                                    if "weight" in match["awA"]["matchMeasurements"] and match["awA"]["matchMeasurements"]["weight"] 
                                    else float(re.search(r"\d+", match["weightCategory"]).group()),
            "Armwrestler A Bicep": match["awA"]["matchMeasurements"].get("bicep", np.nan),
            "Armwrestler A Forearm Circumference": match["awA"]["matchMeasurements"].get("foreArmCirc", np.nan),
            "Armwrestler A Height": match["awA"]["armwrestler"].get("height", np.nan),
            "Armwrestler B Name": f"{match['awB']['armwrestler']['firstName']} {match['awB']['armwrestler']['lastName']}",
            "Armwrestler B Country": match["awB"]["armwrestler"]["country"],
            "Armwrestler B Age": event_date.year - int(match["awB"]["armwrestler"]["dateOfBirth"][:4]) 
                                if match["awB"]["armwrestler"]["dateOfBirth"] else None,
            "Armwrestler B Weight": float(match["awB"]["matchMeasurements"]["weight"].replace("kg", "")) 
                                    if "weight" in match["awB"]["matchMeasurements"] and match["awB"]["matchMeasurements"]["weight"] 
                                    else float(re.search(r"\d+", match["weightCategory"]).group()),
            "Armwrestler B Bicep": match["awB"]["matchMeasurements"].get("bicep", np.nan),
            "Armwrestler B Forearm Circumference": match["awB"]["matchMeasurements"].get("foreArmCirc", np.nan),
            "Armwrestler B Height": match["awB"]["armwrestler"].get("height", np.nan),
            "Armwrestler A Rounds Won": match["awA"]["scoreCard"]["roundsWon"],
            "Armwrestler B Rounds Won": match["awB"]["scoreCard"]["roundsWon"],
        }
    )


matches = pd.DataFrame(rows)
# print(matches)
matches.shape

(294, 21)

In [600]:
all_armwrestlers = pd.concat([matches["Armwrestler A Name"], matches["Armwrestler B Name"]])
# print(all_armwrestlers.value_counts())

In [601]:
# matches.dtypes

In [602]:
def randomize_armwrestlers(row):
    if random.random() > 0.5:  # 50% chance to swap
        # Swap Armwrestler A and B
        row["Armwrestler A Name"], row["Armwrestler B Name"] = row["Armwrestler B Name"], row["Armwrestler A Name"]
        row["Armwrestler A Country"], row["Armwrestler B Country"] = row["Armwrestler B Country"], row["Armwrestler A Country"]
        row["Armwrestler A Age"], row["Armwrestler B Age"] = row["Armwrestler B Age"], row["Armwrestler A Age"]
        row["Armwrestler A Weight"], row["Armwrestler B Weight"] = row["Armwrestler B Weight"], row["Armwrestler A Weight"]
        row["Armwrestler A Bicep"], row["Armwrestler B Bicep"] = row["Armwrestler B Bicep"], row["Armwrestler A Bicep"]
        row["Armwrestler A Forearm Circumference"], row["Armwrestler B Forearm Circumference"] = row["Armwrestler B Forearm Circumference"], row["Armwrestler A Forearm Circumference"]
        row["Armwrestler A Height"], row["Armwrestler B Height"] = row["Armwrestler B Height"], row["Armwrestler A Height"]
        row["Armwrestler A Rounds Won"], row["Armwrestler B Rounds Won"] = row["Armwrestler B Rounds Won"], row["Armwrestler A Rounds Won"]
    return row

In [603]:
matches["Armwrestler A Bicep"] = matches["Armwrestler A Bicep"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)

matches["Armwrestler A Forearm Circumference"] = matches["Armwrestler A Forearm Circumference"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)

matches["Armwrestler A Height"] = matches["Armwrestler A Height"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)

matches["Armwrestler B Bicep"] = matches["Armwrestler B Bicep"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)

matches["Armwrestler B Forearm Circumference"] = matches["Armwrestler B Forearm Circumference"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)

matches["Armwrestler B Height"] = matches["Armwrestler B Height"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)

matches["Weight Ratio"] = matches["Armwrestler A Weight"] / matches["Armwrestler B Weight"]
matches["Bicep Ratio"] = matches["Armwrestler A Bicep"] / matches["Armwrestler B Bicep"]
matches["Forearm Circumference Ratio"] = matches["Armwrestler A Forearm Circumference"] / matches["Armwrestler B Forearm Circumference"]
matches["Height Ratio"] = matches["Armwrestler A Height"] / matches["Armwrestler B Height"]

matches["Weight Difference"] = matches["Armwrestler A Weight"] - matches["Armwrestler B Weight"]
matches["Bicep Difference"] = matches["Armwrestler A Bicep"] - matches["Armwrestler B Bicep"]
matches["Forearm Circumference Difference"] = matches["Armwrestler A Forearm Circumference"] - matches["Armwrestler B Forearm Circumference"]
matches["Height Difference"] = matches["Armwrestler A Height"] - matches["Armwrestler B Height"]
matches["Age Difference"] = matches["Armwrestler A Age"] - matches["Armwrestler B Age"]


matches["event_code"] = matches["Event Name"].astype("category").cat.codes
matches["location_code"] = matches["Event Location"].astype("category").cat.codes
matches["weight_class_code"] = matches["Weight Class"].astype("category").cat.codes
matches["arm_code"] = matches["Arm"].astype("category").cat.codes
matches["a_country_code"] = matches["Armwrestler A Country"].astype("category").cat.codes
matches["b_country_code"] = matches["Armwrestler B Country"].astype("category").cat.codes
matches["opponent_code"] = matches["Armwrestler B Name"].astype("category").cat.codes

matches["Event Date"] = pd.to_datetime(matches["Event Date"])
matches["Event Year"] = matches["Event Date"].dt.year
matches["Event Month"] = matches["Event Date"].dt.month

matches = matches.apply(randomize_armwrestlers, axis=1)
matches["target (W/L)"] = (matches["Armwrestler A Rounds Won"] > matches["Armwrestler B Rounds Won"]).astype(int)


win_rate_a = matches.groupby("Armwrestler A Name")["target (W/L)"].mean()
matches["Armwrestler A Win Rate"] = matches["Armwrestler A Name"].map(win_rate_a)
total_matches_a = matches.groupby("Armwrestler A Name").size()
matches["Armwrestler A Total Matches"] = matches["Armwrestler A Name"].map(total_matches_a)

win_rate_b = matches.groupby("Armwrestler B Name")["target (W/L)"].mean()
matches["Armwrestler B Win Rate"] = matches["Armwrestler B Name"].map(win_rate_b)
total_matches_b = matches.groupby("Armwrestler B Name").size()
matches["Armwrestler B Total Matches"] = matches["Armwrestler B Name"].map(total_matches_b)

matches["Event Country"] = matches["Event Location"].apply(lambda x: x.split(",")[-1].strip().lower())
matches["Is Home Country A"] = (matches["Event Country"] == matches["Armwrestler A Country"]).astype(int)
matches["Is Home Country B"] = (matches["Event Country"] == matches["Armwrestler B Country"]).astype(int)


print(matches)




               Event Name               Event Location                Event Date   Weight Class    Arm     Armwrestler A Name Armwrestler A Country  Armwrestler A Age  Armwrestler A Weight  Armwrestler A Bicep  Armwrestler A Forearm Circumference  Armwrestler A Height    Armwrestler B Name Armwrestler B Country  Armwrestler B Age  Armwrestler B Weight  Armwrestler B Bicep  Armwrestler B Forearm Circumference  Armwrestler B Height  Armwrestler A Rounds Won  Armwrestler B Rounds Won  Weight Ratio  Bicep Ratio  Forearm Circumference Ratio  Height Ratio  Weight Difference  Bicep Difference  Forearm Circumference Difference  Height Difference  Age Difference  event_code  location_code  weight_class_code  arm_code  a_country_code  b_country_code  opponent_code  Event Year  Event Month  target (W/L)  Armwrestler A Win Rate  Armwrestler A Total Matches  Armwrestler B Win Rate  Armwrestler B Total Matches         Event Country  Is Home Country A  Is Home Country B
0    King of the table 13    N

In [604]:
rf = RandomForestClassifier(n_estimators=50, min_samples_split=10)
split_year = 2023
train_data = matches[matches["Event Year"] <= split_year]
test_data = matches[matches["Event Year"] > split_year]
predictors = ["Armwrestler A Age", "Armwrestler A Weight",
               "Armwrestler A Bicep", "Armwrestler A Forearm Circumference",
                 "Armwrestler A Height", "Armwrestler B Age",
                   "Armwrestler B Weight", "Armwrestler B Bicep",
                     "Armwrestler B Forearm Circumference", "Armwrestler B Height",
                     "event_code", "location_code", "weight_class_code", "arm_code", "a_country_code",
                     "b_country_code", "opponent_code", "Event Year", "Event Month"]

rf.fit(train_data[predictors], train_data["target (W/L)"])

In [605]:
preds = rf.predict(test_data[predictors])
acc = accuracy_score(test_data["target (W/L)"], preds)
test_data["target (W/L)"].value_counts()


target (W/L)
1    66
0    49
Name: count, dtype: int64

In [606]:
combined = pd.DataFrame(dict(actual=test_data["target (W/L)"], prediction = preds))
pd.crosstab(index=combined["actual"], columns=combined["prediction"])

prediction,0,1
actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0,35,14
1,30,36


In [607]:
print("Precision:", precision_score(test_data["target (W/L)"], preds))
print("Recall:", recall_score(test_data["target (W/L)"], preds))
print("F1 Score:", f1_score(test_data["target (W/L)"], preds))
print("Confusion Matrix:\n", confusion_matrix(test_data["target (W/L)"], preds))

Precision: 0.72
Recall: 0.5454545454545454
F1 Score: 0.6206896551724138
Confusion Matrix:
 [[35 14]
 [30 36]]


In [608]:
grouped_matches_A = matches.groupby("Armwrestler A Name")
grouped_matches_B = matches.groupby("Armwrestler B Name")
group_A = grouped_matches_A.get_group("Devon Larratt")
# group_A

In [609]:
group_B = grouped_matches_B.get_group("Devon Larratt")
# group_B

In [610]:
feature_importances = pd.DataFrame({
    "Feature": predictors,
    "Importance": rf.feature_importances_
}).sort_values(by="Importance", ascending=False)
print(feature_importances)

                                Feature  Importance
5                     Armwrestler B Age    0.092596
16                        opponent_code    0.075905
1                  Armwrestler A Weight    0.075741
4                  Armwrestler A Height    0.075608
2                   Armwrestler A Bicep    0.075336
8   Armwrestler B Forearm Circumference    0.075170
6                  Armwrestler B Weight    0.069994
3   Armwrestler A Forearm Circumference    0.065060
7                   Armwrestler B Bicep    0.056455
14                       a_country_code    0.056046
9                  Armwrestler B Height    0.055642
0                     Armwrestler A Age    0.048971
18                          Event Month    0.043521
15                       b_country_code    0.041160
10                           event_code    0.038596
12                    weight_class_code    0.026022
17                           Event Year    0.015337
13                             arm_code    0.006767
11          