In [1]:
# Importing packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [78]:
# Importing dataset
raw_matches = pd.read_csv('d1_all_match_results.csv')
raw_matches

Unnamed: 0,Season,Date,Event,Weight Class,Result,Result Type,Score,Opponent,Opponent ID,Opponent Record,Opponent School,Wrestler,Wrestler ID
0,2025,02/01,Edinboro Open,133,W,FALL,3:45,Liam Dwyer,99363,Unlisted,Spartan Combat RTC,Jace Schafer,73377
1,2025,02/01,Edinboro Open,133,L,FALL,2:51,Aiden Allen,92968,6 - 6,Virginia,Jace Schafer,73377
2,2025,02/01,Edinboro Open,133,W,DEC,6 - 0,Jack Kazalas,93763,5 - 10,Binghamton,Jace Schafer,73377
3,2025,02/01,Edinboro Open,133,L,MD,15 - 4,Conor Collins,74086,9 - 5,Army West Point,Jace Schafer,73377
4,2025,12/21,West Virginia - Campbell Dual,125,W,FALL,3:29,Anthony Molton,57207,18 - 8,Campbell,Jace Schafer,73377
...,...,...,...,...,...,...,...,...,...,...,...,...,...
82918,2022,12/11,Cleveland State Open,285,L,MD,9 - 0,Jacob Cover,57237,11 - 18,Kent State,Mason Cover,72597
82919,2022,11/06,Michigan State Open,285,W,MD,8 - 0,Brendan Mahar,74307,Unlisted,Cornerstone,Mason Cover,72597
82920,2022,11/06,Michigan State Open,285,W,DEC,4 - 3,Jacob Christiansen,73347,6 - 5,Northern Illinois,Mason Cover,72597
82921,2022,11/06,Michigan State Open,285,W,DEC,6 - 3,Keegan Nugent,71974,5 - 8,Michigan,Mason Cover,72597


In [79]:
raw_matches.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 82923 entries, 0 to 82922
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Season           82923 non-null  int64 
 1   Date             82923 non-null  object
 2   Event            82923 non-null  object
 3   Weight Class     82923 non-null  int64 
 4   Result           82923 non-null  object
 5   Result Type      82923 non-null  object
 6   Score            82923 non-null  object
 7   Opponent         82923 non-null  object
 8   Opponent ID      82923 non-null  int64 
 9   Opponent Record  82923 non-null  object
 10  Opponent School  82923 non-null  object
 11  Wrestler         82923 non-null  object
 12  Wrestler ID      82923 non-null  int64 
dtypes: int64(4), object(9)
memory usage: 8.2+ MB


In [80]:
# Checking for any duplicated matches
len(raw_matches[raw_matches.duplicated(keep=False)])

0

In [81]:
# Adding datetime column that captures full date
raw_matches['temp_date'] = pd.to_datetime(raw_matches['Season'].astype('str') + '/' + raw_matches['Date'], format='%Y/%m/%d')
    
# Extract month to identify which dates need adjustment
raw_matches['month'] = raw_matches['temp_date'].dt.month

# Adjust year for months that are typically in the previous calendar year
# Assuming August-December (months 8-12) belong to the previous calendar year
raw_matches['datetime'] = raw_matches.apply(lambda row: 
    row['temp_date'].replace(year=int(row['Season']) - 1) 
    if row['month'] >= 8  # August through December
    else row['temp_date'], axis=1)

# Clean up temporary columns and move datetime column
raw_matches = raw_matches.drop(['temp_date', 'month', 'Date'], axis=1)
datetime = raw_matches.pop('datetime')
raw_matches.insert(1, 'Date', datetime)

In [82]:
# Sorting the data by date and wrestler ID
raw_matches = raw_matches.sort_values(['Date', 'Wrestler ID'], ascending=False).copy()
raw_matches['Date']

60024   2025-03-20
60025   2025-03-20
60026   2025-03-20
60027   2025-03-20
13742   2025-03-20
           ...    
68416   2018-12-22
11973   2018-11-03
11974   2018-11-03
11975   2018-11-03
11976   2018-11-03
Name: Date, Length: 82923, dtype: datetime64[ns]

In [83]:
# Editing Season column to reflect full span
raw_matches['Season'] = (raw_matches['Season'] - 1).astype('str') + '/' + raw_matches['Season'].astype('str')
raw_matches['Season']

60024    2024/2025
60025    2024/2025
60026    2024/2025
60027    2024/2025
13742    2024/2025
           ...    
68416    2018/2019
11973    2018/2019
11974    2018/2019
11975    2018/2019
11976    2018/2019
Name: Season, Length: 82923, dtype: object

In [84]:
raw_matches

Unnamed: 0,Season,Date,Event,Weight Class,Result,Result Type,Score,Opponent,Opponent ID,Opponent Record,Opponent School,Wrestler,Wrestler ID
60024,2024/2025,2025-03-20,NCAA Championships,184,L,TF5,19 - 4 2:41,Donnell Washington,57738,17 - 11,Indiana,Eddie Neitenbach,92977
60025,2024/2025,2025-03-20,NCAA Championships,184,W,SV-1,9 - 6,Nick Fine,71734,15 - 9,Columbia,Eddie Neitenbach,92977
60026,2024/2025,2025-03-20,NCAA Championships,184,W,FALL,1:08,TJ McDonnell,79786,8 - 15,Oregon State,Eddie Neitenbach,92977
60027,2024/2025,2025-03-20,NCAA Championships,184,L,MD,10 - 0,Deanthony Parker,71644,20 - 8,Oklahoma,Eddie Neitenbach,92977
13742,2024/2025,2025-03-20,NCAA Championships,174,L,MD,11 - 2,Luca Augustine,68730,20 - 10,Pittsburgh,Branson John,92961
...,...,...,...,...,...,...,...,...,...,...,...,...,...
68416,2018/2019,2018-12-22,Wilkes Open,174,W,DEC,6 - 3,Conner Allshouse,44877,8 - 12,American,Edmond Ruth,51277
11973,2018/2019,2018-11-03,Cowboy Open,125,W,TF5,23 - 5 5:00,Cole Bernstein,53434,Unlisted,Colorado Mesa,Julian Tagg,58261
11974,2018/2019,2018-11-03,Cowboy Open,125,W,DEC,11 - 10,Isaiah Delacerda,52435,Unlisted,Adams State,Julian Tagg,58261
11975,2018/2019,2018-11-03,Cowboy Open,125,L,DEC,5 - 2,Jace Koelzer,50912,2 - 2,Northern Colorado,Julian Tagg,58261
