In [55]:
import requests
import json
import pandas as pd
import re
import numpy as np
from datetime import datetime
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score

pd.set_option("display.max_columns", None)  # Show all columns
pd.set_option("display.width", 1000)       # Set a wide display width
pd.set_option("display.max_rows", None)    # Show all rows (useful for large datasets)

In [56]:
file = open("matches.json", "r", encoding="utf-8")
data = json.load(file)

rows = []

for match in data:
    event_date = datetime.strptime(match["event"]["eventDate"], "%Y-%m-%dT%H:%M:%S.%fZ")
    rows.append(
        {
            "Event Name": match["event"]["eventName"],
            "Event Location": match["event"]["location"],
            "Event Date": match["event"]["eventDate"],
            "Weight Class": match["weightCategory"],
            "Arm": match["arm"],
            "Armwrestler A Name": f"{match['awA']['armwrestler']['firstName']} {match['awA']['armwrestler']['lastName']}",
            "Armwrestler A Country": match["awA"]["armwrestler"]["country"],
            "Armwrestler A Age": event_date.year - int(match["awA"]["armwrestler"]["dateOfBirth"][:4]) 
                                if match["awA"]["armwrestler"]["dateOfBirth"] else None,
            "Armwrestler A Weight": float(match["awA"]["matchMeasurements"]["weight"].replace("kg", "")) 
                                    if "weight" in match["awA"]["matchMeasurements"] and match["awA"]["matchMeasurements"]["weight"] 
                                    else float(re.search(r"\d+", match["weightCategory"]).group()),
            "Armwrestler A Bicep": match["awA"]["matchMeasurements"].get("bicep", np.nan),
            "Armwrestler A Forearm Circumference": match["awA"]["matchMeasurements"].get("foreArmCirc", np.nan),
            "Armwrestler A Height": match["awA"]["armwrestler"].get("height", np.nan),
            "Armwrestler B Name": f"{match['awB']['armwrestler']['firstName']} {match['awB']['armwrestler']['lastName']}",
            "Armwrestler B Country": match["awB"]["armwrestler"]["country"],
            "Armwrestler B Age": event_date.year - int(match["awB"]["armwrestler"]["dateOfBirth"][:4]) 
                                if match["awB"]["armwrestler"]["dateOfBirth"] else None,
            "Armwrestler B Weight": float(match["awB"]["matchMeasurements"]["weight"].replace("kg", "")) 
                                    if "weight" in match["awB"]["matchMeasurements"] and match["awB"]["matchMeasurements"]["weight"] 
                                    else float(re.search(r"\d+", match["weightCategory"]).group()),
            "Armwrestler B Bicep": match["awB"]["matchMeasurements"].get("bicep", np.nan),
            "Armwrestler B Forearm Circumference": match["awB"]["matchMeasurements"].get("foreArmCirc", np.nan),
            "Armwrestler B Height": match["awB"]["armwrestler"].get("height", np.nan),
            "Armwrestler A Rounds Won": match["awA"]["scoreCard"]["roundsWon"],
            "Armwrestler B Rounds Won": match["awB"]["scoreCard"]["roundsWon"],
        }
    )

# Create the DataFrame
matches = pd.DataFrame(rows)

# Duplicate the DataFrame for Armwrestler A and B
matches_A = matches.copy()
matches_B = matches.copy()

# Swap the Armwrestler A and Armwrestler B data
matches_A["opponent_code"] = matches["Armwrestler B Name"]  # Competitor for Armwrestler A
matches_B["opponent_code"] = matches["Armwrestler A Name"]  # Competitor for Armwrestler B

matches_A["Armwrestler A Name"] = matches["Armwrestler A Name"]
matches_A["Armwrestler A Weight"] = matches["Armwrestler A Weight"]
matches_A["Armwrestler A Age"] = matches["Armwrestler A Age"]
matches_A["Armwrestler A Bicep"] = matches["Armwrestler A Bicep"]
matches_A["Armwrestler A Forearm Circumference"] = matches["Armwrestler A Forearm Circumference"]
matches_A["Armwrestler A Height"] = matches["Armwrestler A Height"]

matches_B["Armwrestler A Name"] = matches["Armwrestler B Name"]
matches_B["Armwrestler A Weight"] = matches["Armwrestler B Weight"]
matches_B["Armwrestler A Age"] = matches["Armwrestler B Age"]
matches_B["Armwrestler A Bicep"] = matches["Armwrestler B Bicep"]
matches_B["Armwrestler A Forearm Circumference"] = matches["Armwrestler B Forearm Circumference"]
matches_B["Armwrestler A Height"] = matches["Armwrestler B Height"]

matches_A["Armwrestler B Name"] = matches["Armwrestler B Name"]
matches_A["Armwrestler B Weight"] = matches["Armwrestler B Weight"]
matches_A["Armwrestler B Age"] = matches["Armwrestler B Age"]
matches_A["Armwrestler B Bicep"] = matches["Armwrestler B Bicep"]
matches_A["Armwrestler B Forearm Circumference"] = matches["Armwrestler B Forearm Circumference"]
matches_A["Armwrestler B Height"] = matches["Armwrestler B Height"]

matches_B["Armwrestler B Name"] = matches["Armwrestler A Name"]
matches_B["Armwrestler B Weight"] = matches["Armwrestler A Weight"]
matches_B["Armwrestler B Age"] = matches["Armwrestler A Age"]
matches_B["Armwrestler B Bicep"] = matches["Armwrestler A Bicep"]
matches_B["Armwrestler B Forearm Circumference"] = matches["Armwrestler A Forearm Circumference"]
matches_B["Armwrestler B Height"] = matches["Armwrestler A Height"]

# Concatenate the original and swapped DataFrames
final_matches = pd.concat([matches_A, matches_B])

In [57]:
all_armwrestlers = pd.concat([final_matches["Armwrestler A Name"], final_matches["Armwrestler B Name"]])
# print(all_armwrestlers.value_counts())

In [58]:
# matches.dtypes

In [59]:
final_matches["Armwrestler A Bicep"] = final_matches["Armwrestler A Bicep"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)

final_matches["Armwrestler A Forearm Circumference"] = final_matches["Armwrestler A Forearm Circumference"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)

final_matches["Armwrestler A Height"] = final_matches["Armwrestler A Height"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)

final_matches["Armwrestler B Bicep"] = final_matches["Armwrestler B Bicep"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)

final_matches["Armwrestler B Forearm Circumference"] = final_matches["Armwrestler B Forearm Circumference"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)

final_matches["Armwrestler B Height"] = final_matches["Armwrestler B Height"].apply(
    lambda x: float(re.match(r"(\d+\.?\d*)", x).group(1)) if isinstance(x, str) and re.match(r"(\d+\.?\d*)", x) else np.nan
)


final_matches["event_code"] = final_matches["Event Name"].astype("category").cat.codes
final_matches["location_code"] = final_matches["Event Location"].astype("category").cat.codes
final_matches["weight_class_code"] = final_matches["Weight Class"].astype("category").cat.codes
final_matches["arm_code"] = final_matches["Arm"].astype("category").cat.codes
final_matches["a_country_code"] = final_matches["Armwrestler A Country"].astype("category").cat.codes
final_matches["b_country_code"] = final_matches["Armwrestler B Country"].astype("category").cat.codes
final_matches["opponent_code"] = final_matches["Armwrestler B Name"].astype("category").cat.codes

final_matches["Event Date"] = pd.to_datetime(final_matches["Event Date"])
final_matches["Event Year"] = final_matches["Event Date"].dt.year
final_matches["Event Month"] = final_matches["Event Date"].dt.month

final_matches["target (W/L)"] = (final_matches["Armwrestler A Rounds Won"] > final_matches["Armwrestler B Rounds Won"]).astype(int)

print(final_matches.head())




             Event Name             Event Location                Event Date  Weight Class    Arm Armwrestler A Name Armwrestler A Country  Armwrestler A Age  Armwrestler A Weight  Armwrestler A Bicep  Armwrestler A Forearm Circumference  Armwrestler A Height Armwrestler B Name Armwrestler B Country  Armwrestler B Age  Armwrestler B Weight  Armwrestler B Bicep  Armwrestler B Forearm Circumference  Armwrestler B Height  Armwrestler A Rounds Won  Armwrestler B Rounds Won  opponent_code  event_code  location_code  weight_class_code  arm_code  a_country_code  b_country_code  Event Year  Event Month  target (W/L)
0  King of the table 13  New Jersey, United States 2024-12-14 08:00:00+00:00       men_105  right      Lachlan Adair             australia               34.0                 105.0                 51.0                                 43.0                 185.0     Lars Rorbakken                norway               45.0                 105.0                 48.0                      

In [60]:
rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=1)
split_year = 2023
train_data = final_matches[final_matches["Event Year"] <= split_year]
test_data = final_matches[final_matches["Event Year"] > split_year]
predictors = ["Armwrestler A Age", "Armwrestler A Weight",
               "Armwrestler A Bicep", "Armwrestler A Forearm Circumference",
                 "Armwrestler A Height", "Armwrestler B Age",
                   "Armwrestler B Weight", "Armwrestler B Bicep",
                     "Armwrestler B Forearm Circumference", "Armwrestler B Height",
                     "Armwrestler A Rounds Won", "Armwrestler B Rounds Won",
                     "event_code", "location_code", "weight_class_code", "arm_code", "a_country_code",
                     "b_country_code", "opponent_code", "Event Year", "Event Month"]

rf.fit(train_data[predictors], train_data["target (W/L)"])

In [61]:
preds = rf.predict(test_data[predictors])
acc = accuracy_score(test_data["target (W/L)"], preds)
acc
test_data["target (W/L)"].value_counts()


target (W/L)
1    220
0     10
Name: count, dtype: int64

In [62]:
combined = pd.DataFrame(dict(actual=test_data["target (W/L)"], prediction = preds))
pd.crosstab(index=combined["actual"], columns=combined["prediction"])

prediction,1
actual,Unnamed: 1_level_1
0,10
1,220


In [63]:
print(precision_score(test_data["target (W/L)"], preds))

0.9565217391304348


In [64]:
grouped_matches = final_matches.groupby("Armwrestler A Name")
group = grouped_matches.get_group("Devon Larratt")
group

Unnamed: 0,Event Name,Event Location,Event Date,Weight Class,Arm,Armwrestler A Name,Armwrestler A Country,Armwrestler A Age,Armwrestler A Weight,Armwrestler A Bicep,Armwrestler A Forearm Circumference,Armwrestler A Height,Armwrestler B Name,Armwrestler B Country,Armwrestler B Age,Armwrestler B Weight,Armwrestler B Bicep,Armwrestler B Forearm Circumference,Armwrestler B Height,Armwrestler A Rounds Won,Armwrestler B Rounds Won,opponent_code,event_code,location_code,weight_class_code,arm_code,a_country_code,b_country_code,Event Year,Event Month,target (W/L)
23,East vs West 15,"Istanbul, Turkey",2024-11-02 07:00:00+00:00,men_105,right,Devon Larratt,canada,49.0,105.0,43.0,36.8,196.0,Oleg Petrenko,ukraine,27.0,105.0,47.0,45.0,180.0,4,0,128,9,2,0,1,5,34,2024,11,1
28,King of the Table 12,"Dubai, United Arab Emirates",2024-09-28 07:00:00+00:00,men_115_plus,left,Devon Larratt,canada,49.0,118.0,45.0,40.0,196.0,Leonidas Arkona,germany,31.0,118.0,54.0,48.0,182.0,4,0,111,21,0,2,0,5,11,2024,9,1
42,East vs West 14,"Florida, United States",2024-08-10 07:00:00+00:00,men_115_plus,right,Devon Larratt,canada,49.0,105.0,47.0,42.0,196.0,Chance Shaw,united states,25.0,130.0,50.8,45.7,183.0,3,1,36,8,1,2,1,5,35,2024,8,1
120,King of the Table 9,"Dubai, United Arab Emirates",2023-12-09 08:00:00+00:00,men_115_plus,right,Devon Larratt,canada,48.0,125.0,45.0,40.0,196.0,Georgi Tsvetkov,bulgaria,34.0,165.0,55.0,49.0,190.0,6,0,74,29,0,2,1,5,3,2023,12,1
131,East vs West 10,"Istanbul, Turkey",2023-11-11 08:00:00+00:00,men_115_plus,right,Devon Larratt,canada,48.0,120.0,47.0,42.0,196.0,Denis Cyplenkov,russia,41.0,130.0,54.0,45.0,186.0,3,0,48,4,2,2,1,5,28,2023,11,1
149,East vs West 9,"Istanbul, Turkey",2023-08-26 07:00:00+00:00,men_115_plus,right,Devon Larratt,canada,48.0,120.0,47.0,42.0,196.0,Ermes Gasparini,italy,29.0,132.0,54.0,49.0,188.0,3,0,62,17,2,2,1,5,16,2023,8,1
167,King of the Table 7,"Dubai, United Arab Emirates",2023-06-24 07:00:00+00:00,men_115_plus,right,Devon Larratt,canada,48.0,128.5,45.0,40.0,196.0,Dave Chaffee,united states,46.0,127.5,50.8,46.0,188.0,6,0,44,27,0,2,1,5,35,2023,6,1
214,East vs West 5,"Istanbul, Turkey",2022-11-19 08:00:00+00:00,men_115,right,Devon Larratt,canada,47.0,114.0,46.0,40.0,196.0,Evgeny Prudnyk,ukraine,34.0,115.0,49.0,47.0,185.0,3,0,65,13,2,1,1,5,34,2022,11,1
279,King of the Table 2,"Dubai, United Arab Emirates",2021-12-11 08:00:00+00:00,men_115_plus,right,Devon Larratt,canada,46.0,129.6,46.99,38.73,196.0,John Brzenk,united states,56.0,114.6,44.45,40.64,186.0,4,0,91,22,0,2,1,5,35,2021,12,1
293,King of the Table 1,"Dubai, United Arab Emirates",2021-05-28 07:00:00+00:00,men_115_plus,right,Devon Larratt,canada,46.0,133.81,45.72,38.73,196.0,Michael Todd,united states,47.0,122.47,52.07,41.91,190.0,5,0,119,18,0,2,1,5,35,2021,5,1
