In [444]:
import requests
import json
import pandas as pd
import re
import numpy as np
from datetime import datetime
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score

pd.set_option("display.max_columns", None)  # Show all columns
pd.set_option("display.width", 1000)       # Set a wide display width
pd.set_option("display.max_rows", None)    # Show all rows (useful for large datasets)

In [445]:
file = open("matches.json", "r", encoding="utf-8")
data = json.load(file)

rows = []

for match in data:
    event_date = datetime.strptime(match["event"]["eventDate"], "%Y-%m-%dT%H:%M:%S.%fZ")
    rows.append(
        {
            "Event Name": match["event"]["eventName"],
            "Event Location": match["event"]["location"],
            "Event Date": match["event"]["eventDate"],
            "Weight Class": match["weightCategory"],
            "Arm": match["arm"],
            "Armwrestler A Name": f"{match['awA']['armwrestler']['firstName']} {match['awA']['armwrestler']['lastName']}",
            "Armwrestler A Country": match["awA"]["armwrestler"]["country"],
            "Armwrestler A Age": event_date.year - int(match["awA"]["armwrestler"]["dateOfBirth"][:4]) 
                                if match["awA"]["armwrestler"]["dateOfBirth"] else None,
            "Armwrestler A Weight": float(match["awA"]["matchMeasurements"]["weight"].replace("kg", "")) 
                                    if "weight" in match["awA"]["matchMeasurements"] and match["awA"]["matchMeasurements"]["weight"] 
                                    else float(re.search(r"\d+", match["weightCategory"]).group()),
            "Armwrestler A Bicep": match["awA"]["matchMeasurements"].get("bicep", np.nan),
            "Armwrestler A Forearm Circumference": match["awA"]["matchMeasurements"].get("foreArmCirc", np.nan),
            "Armwrestler A Height": match["awA"]["armwrestler"].get("height", np.nan),
            "Armwrestler B Name": f"{match['awB']['armwrestler']['firstName']} {match['awB']['armwrestler']['lastName']}",
            "Armwrestler B Country": match["awB"]["armwrestler"]["country"],
            "Armwrestler B Age": event_date.year - int(match["awB"]["armwrestler"]["dateOfBirth"][:4]) 
                                if match["awB"]["armwrestler"]["dateOfBirth"] else None,
            "Armwrestler B Weight": float(match["awB"]["matchMeasurements"]["weight"].replace("kg", "")) 
                                    if "weight" in match["awB"]["matchMeasurements"] and match["awB"]["matchMeasurements"]["weight"] 
                                    else float(re.search(r"\d+", match["weightCategory"]).group()),
            "Armwrestler B Bicep": match["awB"]["matchMeasurements"].get("bicep", np.nan),
            "Armwrestler B Forearm Circumference": match["awB"]["matchMeasurements"].get("foreArmCirc", np.nan),
            "Armwrestler B Height": match["awB"]["armwrestler"].get("height", np.nan),
            "Armwrestler A Rounds Won": match["awA"]["scoreCard"]["roundsWon"],
            "Armwrestler B Rounds Won": match["awB"]["scoreCard"]["roundsWon"],
        }
    )


matches = pd.DataFrame(rows)
# print(matches)
matches.shape

(294, 21)

In [446]:
all_armwrestlers = pd.concat([matches["Armwrestler A Name"], matches["Armwrestler B Name"]])
# print(all_armwrestlers.value_counts())

In [447]:
# matches.dtypes

In [448]:
matches["Armwrestler A Bicep"] = matches["Armwrestler A Bicep"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)

matches["Armwrestler A Forearm Circumference"] = matches["Armwrestler A Forearm Circumference"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)

matches["Armwrestler A Height"] = matches["Armwrestler A Height"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)

matches["Armwrestler B Bicep"] = matches["Armwrestler B Bicep"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)

matches["Armwrestler B Forearm Circumference"] = matches["Armwrestler B Forearm Circumference"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)

matches["Armwrestler B Height"] = matches["Armwrestler B Height"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)


matches["event_code"] = matches["Event Name"].astype("category").cat.codes
matches["location_code"] = matches["Event Location"].astype("category").cat.codes
matches["weight_class_code"] = matches["Weight Class"].astype("category").cat.codes
matches["arm_code"] = matches["Arm"].astype("category").cat.codes
matches["a_country_code"] = matches["Armwrestler A Country"].astype("category").cat.codes
matches["b_country_code"] = matches["Armwrestler B Country"].astype("category").cat.codes
matches["opponent_code"] = matches["Armwrestler B Name"].astype("category").cat.codes

matches["Event Date"] = pd.to_datetime(matches["Event Date"])
matches["Event Year"] = matches["Event Date"].dt.year
matches["Event Month"] = matches["Event Date"].dt.month

matches["target (W/L)"] = (matches["Armwrestler A Rounds Won"] > matches["Armwrestler B Rounds Won"]).astype(int)

print(matches)




               Event Name               Event Location                Event Date   Weight Class    Arm    Armwrestler A Name Armwrestler A Country  Armwrestler A Age  Armwrestler A Weight  Armwrestler A Bicep  Armwrestler A Forearm Circumference  Armwrestler A Height     Armwrestler B Name Armwrestler B Country  Armwrestler B Age  Armwrestler B Weight  Armwrestler B Bicep  Armwrestler B Forearm Circumference  Armwrestler B Height  Armwrestler A Rounds Won  Armwrestler B Rounds Won  event_code  location_code  weight_class_code  arm_code  a_country_code  b_country_code  opponent_code  Event Year  Event Month  target (W/L)
0    King of the table 13    New Jersey, United States 2024-12-14 08:00:00+00:00        men_105  right         Lachlan Adair             australia               34.0                105.00                51.00                                43.00                185.00         Lars Rorbakken                norway                 45                105.00                48.

In [449]:
rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=1)
split_year = 2023
train_data = matches[matches["Event Year"] <= split_year]
test_data = matches[matches["Event Year"] > split_year]
predictors = ["Armwrestler A Age", "Armwrestler A Weight",
               "Armwrestler A Bicep", "Armwrestler A Forearm Circumference",
                 "Armwrestler A Height", "Armwrestler B Age",
                   "Armwrestler B Weight", "Armwrestler B Bicep",
                     "Armwrestler B Forearm Circumference", "Armwrestler B Height",
                     "Armwrestler A Rounds Won", "Armwrestler B Rounds Won",
                     "event_code", "location_code", "weight_class_code", "arm_code", "a_country_code",
                     "b_country_code", "opponent_code", "Event Year", "Event Month"]

rf.fit(train_data[predictors], train_data["target (W/L)"])

In [450]:
preds = rf.predict(test_data[predictors])
acc = accuracy_score(test_data["target (W/L)"], preds)
acc
test_data["target (W/L)"].value_counts()


target (W/L)
1    110
0      5
Name: count, dtype: int64

In [451]:
combined = pd.DataFrame(dict(actual=test_data["target (W/L)"], prediction = preds))
pd.crosstab(index=combined["actual"], columns=combined["prediction"])

prediction,1
actual,Unnamed: 1_level_1
0,5
1,110


In [452]:
print("Precision:", precision_score(test_data["target (W/L)"], preds))

Precision: 0.9565217391304348


In [453]:
grouped_matches_A = matches.groupby("Armwrestler A Name")
grouped_matches_B = matches.groupby("Armwrestler B Name")
group_A = grouped_matches_A.get_group("Devon Larratt")
# group_A

In [454]:
group_B = grouped_matches_B.get_group("Devon Larratt")
# group_B