In [69]:
# Importing packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [70]:
# Importing dataset
raw = pd.read_csv('d1_all_match_results.csv')
raw

Unnamed: 0,Season,Date,Event,Weight Class,Result,Result Type,Score,Opponent,Opponent ID,Opponent Record,Opponent School,Wrestler,Wrestler ID
0,2025,02/01,Edinboro Open,133,W,FALL,3:45,Liam Dwyer,99363,Unlisted,Spartan Combat RTC,Jace Schafer,73377
1,2025,02/01,Edinboro Open,133,L,FALL,2:51,Aiden Allen,92968,6 - 6,Virginia,Jace Schafer,73377
2,2025,02/01,Edinboro Open,133,W,DEC,6 - 0,Jack Kazalas,93763,5 - 10,Binghamton,Jace Schafer,73377
3,2025,02/01,Edinboro Open,133,L,MD,15 - 4,Conor Collins,74086,9 - 5,Army West Point,Jace Schafer,73377
4,2025,12/21,West Virginia - Campbell Dual,125,W,FALL,3:29,Anthony Molton,57207,18 - 8,Campbell,Jace Schafer,73377
...,...,...,...,...,...,...,...,...,...,...,...,...,...
82918,2022,12/11,Cleveland State Open,285,L,MD,9 - 0,Jacob Cover,57237,11 - 18,Kent State,Mason Cover,72597
82919,2022,11/06,Michigan State Open,285,W,MD,8 - 0,Brendan Mahar,74307,Unlisted,Cornerstone,Mason Cover,72597
82920,2022,11/06,Michigan State Open,285,W,DEC,4 - 3,Jacob Christiansen,73347,6 - 5,Northern Illinois,Mason Cover,72597
82921,2022,11/06,Michigan State Open,285,W,DEC,6 - 3,Keegan Nugent,71974,5 - 8,Michigan,Mason Cover,72597


In [71]:
raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 82923 entries, 0 to 82922
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Season           82923 non-null  int64 
 1   Date             82923 non-null  object
 2   Event            82923 non-null  object
 3   Weight Class     82923 non-null  int64 
 4   Result           82923 non-null  object
 5   Result Type      82923 non-null  object
 6   Score            82923 non-null  object
 7   Opponent         82923 non-null  object
 8   Opponent ID      82923 non-null  int64 
 9   Opponent Record  82923 non-null  object
 10  Opponent School  82923 non-null  object
 11  Wrestler         82923 non-null  object
 12  Wrestler ID      82923 non-null  int64 
dtypes: int64(4), object(9)
memory usage: 8.2+ MB


In [72]:
# Checking for any duplicated matches
len(raw[raw.duplicated(keep=False)])

0

In [73]:
# Adding datetime column that captures full date
raw['temp_date'] = pd.to_datetime(raw['Season'].astype('str') + '/' + raw['Date'], format='%Y/%m/%d')
    
# Extract month to identify which dates need adjustment
raw['month'] = raw['temp_date'].dt.month

# Adjust year for months that are typically in the previous calendar year
# Assuming August-December (months 8-12) belong to the previous calendar year
raw['datetime'] = raw.apply(lambda row: 
    row['temp_date'].replace(year=int(row['Season']) - 1) 
    if row['month'] >= 8  # August through December
    else row['temp_date'], axis=1)

# Clean up temporary columns and move datetime column
raw = raw.drop(['temp_date', 'month', 'Date'], axis=1)
datetime = raw.pop('datetime')
raw.insert(1, 'Date', datetime)

In [74]:
# Sorting the data by date and wrestler ID
raw = raw.sort_values(['Date', 'Wrestler ID']).copy()
raw['Date']

11973   2018-11-03
11974   2018-11-03
11975   2018-11-03
11976   2018-11-03
68414   2018-12-22
           ...    
13743   2025-03-20
60024   2025-03-20
60025   2025-03-20
60026   2025-03-20
60027   2025-03-20
Name: Date, Length: 82923, dtype: datetime64[ns]

In [75]:
# Editing Season column to reflect full span
raw['Season'] = (raw['Season'] - 1).astype('str') + '/' + raw['Season'].astype('str')
raw['Season']

11973    2018/2019
11974    2018/2019
11975    2018/2019
11976    2018/2019
68414    2018/2019
           ...    
13743    2024/2025
60024    2024/2025
60025    2024/2025
60026    2024/2025
60027    2024/2025
Name: Season, Length: 82923, dtype: object

In [76]:
raw = raw.reset_index(drop=True)
raw

Unnamed: 0,Season,Date,Event,Weight Class,Result,Result Type,Score,Opponent,Opponent ID,Opponent Record,Opponent School,Wrestler,Wrestler ID
0,2018/2019,2018-11-03,Cowboy Open,125,W,TF5,23 - 5 5:00,Cole Bernstein,53434,Unlisted,Colorado Mesa,Julian Tagg,58261
1,2018/2019,2018-11-03,Cowboy Open,125,W,DEC,11 - 10,Isaiah Delacerda,52435,Unlisted,Adams State,Julian Tagg,58261
2,2018/2019,2018-11-03,Cowboy Open,125,L,DEC,5 - 2,Jace Koelzer,50912,2 - 2,Northern Colorado,Julian Tagg,58261
3,2018/2019,2018-11-03,Cowboy Open,125,W,DEC,5 - 4,William Edelblute,57059,2 - 0,Utah Valley,Julian Tagg,58261
4,2018/2019,2018-12-22,Wilkes Open,174,L,DEC,6 - 5,Gino Sita,52064,Unlisted,Alderson-Broaddus University,Edmond Ruth,51277
...,...,...,...,...,...,...,...,...,...,...,...,...,...
82918,2024/2025,2025-03-20,NCAA Championships,174,L,FALL,2:46,Levi Haines,72657,25 - 2,Penn State,Branson John,92961
82919,2024/2025,2025-03-20,NCAA Championships,184,L,TF5,19 - 4 2:41,Donnell Washington,57738,17 - 11,Indiana,Eddie Neitenbach,92977
82920,2024/2025,2025-03-20,NCAA Championships,184,W,SV-1,9 - 6,Nick Fine,71734,15 - 9,Columbia,Eddie Neitenbach,92977
82921,2024/2025,2025-03-20,NCAA Championships,184,W,FALL,1:08,TJ McDonnell,79786,8 - 15,Oregon State,Eddie Neitenbach,92977


In [77]:
levi_matches = raw[raw['Wrestler'] == 'Levi Haines']
levi_matches

Unnamed: 0,Season,Date,Event,Weight Class,Result,Result Type,Score,Opponent,Opponent ID,Opponent Record,Opponent School,Wrestler,Wrestler ID
5311,2021/2022,2022-01-07,David Lehman Open,157,W,DEC,6 - 2,Jon Errico,45265,12 - 3,Virginia,Levi Haines,72657
5312,2021/2022,2022-01-07,David Lehman Open,157,W,INJ,5:17,Andrew Cerniglia,62439,27 - 8,Navy,Levi Haines,72657
5313,2021/2022,2022-01-07,David Lehman Open,157,W,MD,10 - 1,Jack Bokina,56798,11 - 9,Brown,Levi Haines,72657
5314,2021/2022,2022-01-07,David Lehman Open,157,W,MFOR,0 - 0,Markus Hartman,50585,16 - 10,Army West Point,Levi Haines,72657
6692,2021/2022,2022-02-06,Edinboro Open,157,W,DEC,7 - 2,Jared Hill,72164,6 - 1,Oklahoma,Levi Haines,72657
...,...,...,...,...,...,...,...,...,...,...,...,...,...
82400,2024/2025,2025-03-20,NCAA Championships,174,W,DEC,4 - 1,Simon Ruiz,78145,27 - 7,Cornell,Levi Haines,72657
82401,2024/2025,2025-03-20,NCAA Championships,174,L,DEC,4 - 2,Dean Hamiti,62894,27 - 1,Oklahoma State,Levi Haines,72657
82402,2024/2025,2025-03-20,NCAA Championships,174,W,DEC,7 - 2,Danny Wask,72981,23 - 9,Navy,Levi Haines,72657
82403,2024/2025,2025-03-20,NCAA Championships,174,W,DEC,4 - 0,Gaven Sax,65396,15 - 7,Oklahoma,Levi Haines,72657


In [78]:
wrestler_id = levi_matches['Wrestler ID'].iloc[0]
match_date = levi_matches['Date'].iloc[-1]

In [79]:
levi_historical_data = levi_matches[
            (levi_matches['Wrestler ID'] == wrestler_id) & 
            (levi_matches['Date'] < match_date)
        ].copy()

In [80]:
levi_historical_data.tail(15)

Unnamed: 0,Season,Date,Event,Weight Class,Result,Result Type,Score,Opponent,Opponent ID,Opponent Record,Opponent School,Wrestler,Wrestler ID
64844,2024/2025,2024-12-22,Penn State - Missouri Dual,174,L,SV-1,4 - 1,Keegan O'Toole,61119,20 - 1,Missouri,Levi Haines,72657
64845,2024/2025,2024-12-22,Penn State - Binghamton Dual,174,W,TF5,18 - 0 5:00,Roberto Padilla,81108,3 - 10,Binghamton,Levi Haines,72657
64846,2024/2025,2024-12-22,Little Rock - Penn State Dual,174,W,TF5,18 - 2 5:33,Kodiak Cannedy,73048,5 - 5,Little Rock,Levi Haines,72657
68789,2024/2025,2025-01-10,Michigan State - Penn State Dual,174,W,FALL,4:13,Ceasar Garza,73104,13 - 15,Michigan State,Levi Haines,72657
70858,2024/2025,2025-01-17,Penn State - Nebraska Dual,174,W,DEC,9 - 2,Lenny Pinto,63137,24 - 7,Nebraska,Levi Haines,72657
72521,2024/2025,2025-01-24,Penn State - Rutgers Dual,174,W,DEC,5 - 2,Jackson Turley,57636,17 - 12,Rutgers,Levi Haines,72657
73856,2024/2025,2025-01-31,Iowa - Penn State Dual,174,W,DEC,10 - 3,Patrick Kennedy,56760,18 - 7,Iowa,Levi Haines,72657
75891,2024/2025,2025-02-07,Michigan - Penn State Dual,174,W,FALL,2:37,Joseph Walker,63717,10 - 10,Michigan,Levi Haines,72657
76487,2024/2025,2025-02-09,Maryland - Penn State Dual,174,W,DEC,8 - 1,Branson John,92961,14 - 19,Maryland,Levi Haines,72657
77113,2024/2025,2025-02-14,Penn State - Ohio State Dual,174,W,DEC,6 - 4,Carson Kharchla,57703,14 - 5,Ohio State,Levi Haines,72657


In [81]:
def calculate_streaks(historical_results):
    if not historical_results:
        return 0, 0
    
    win_streak = 0
    for result in reversed(historical_results):
        if result == 'W':
            win_streak += 1
        else:
            break
            
    # Calculate current loss streak
    loss_streak = 0
    for result in reversed(historical_results):
        if result == 'L':
            loss_streak += 1
        else:
            break
            
    return win_streak, loss_streak

In [82]:
print(calculate_streaks(levi_historical_data['Result'].tolist()))

(14, 0)


In [83]:
def calculate_win_rate(results):
    """Simple win percentage"""
    if not results:
        return 0.5  # Neutral baseline for no history
    return sum(1 for result in results if result == 'W') / len(results)

In [84]:
print(calculate_win_rate(levi_historical_data['Result'].tolist()))

0.9634146341463414


In [85]:
def calculate_weighted_win_rate(results, weights=None):
    """More recent matches weighted higher"""
    if not results:
        return 0.5
        
    if weights is None:
        # Exponential weights favoring recent matches
        weights = np.exp(np.linspace(0, 1, len(results)))
    
    wins = [1 if result == 'W' else 0 for result in results]
    return np.average(wins, weights=weights)

In [86]:
print(calculate_weighted_win_rate(levi_historical_data['Result'].tolist()))

0.9651086721428607


In [87]:
def calculate_bonus_rate(result_types):
    """Rate of bonus point victories"""
    if not result_types:
        return 0.0
    bonus_types = ['FALL', 'TF5', 'MD']
    return sum(1 for rt in result_types if rt in bonus_types) / len(result_types)

In [88]:
levi_historical_data['Result Type'].value_counts()

Result Type
DEC     32
FALL    20
MD      12
TF5     10
SV-1     5
INJ      1
MFOR     1
TB-2     1
Name: count, dtype: int64

In [89]:
print(calculate_bonus_rate(levi_historical_data['Result Type'].tolist()))

0.5121951219512195


In [90]:
def get_recent_performance(historical_data, n_matches=5):
    """Returns recent win rate, bonus rate, etc."""
    if len(historical_data) == 0:
        return {'win_rate': 0.5, 'bonus_rate': 0.0, 'match_count': 0}
    
    recent_data = historical_data.tail(n_matches)
    return {
        'win_rate': calculate_win_rate(recent_data['Result'].tolist()),
        'bonus_rate': calculate_bonus_rate(recent_data['Result Type'].tolist()),
        'match_count': len(recent_data)
    }


In [91]:
print(get_recent_performance(levi_historical_data, n_matches=5))

{'win_rate': 1.0, 'bonus_rate': 0.6, 'match_count': 5}


In [92]:
def get_opponent_average_strength(opponents_list, all_match_data, before_date):
    """Average win rate of recent opponents"""
    if not opponents_list:
        return 0.5
    
    opponent_win_rates = []
    for opponent in opponents_list:
        # Get opponent's record before the match date
        opponent_data = all_match_data[
            (all_match_data['Wrestler ID'] == opponent) & 
            (all_match_data['Date'] < before_date)
        ]
        if len(opponent_data) > 0:
            win_rate = calculate_win_rate(opponent_data['Result'].tolist())
            opponent_win_rates.append(win_rate)
    
    return np.mean(opponent_win_rates) if opponent_win_rates else 0.5

def calculate_strength_of_schedule(all_opponents, all_match_data, before_date):
    """Overall opponent strength"""
    return get_opponent_average_strength(all_opponents, all_match_data, before_date)

def get_performance_vs_strong_opponents(matches_data, all_match_data, before_date, strong_threshold=0.4):
    """How well against tough competition"""
    if len(matches_data) == 0:
        return 0.5
    
    strong_opponent_results = []
    
    for _, match in matches_data.iterrows():
        opponent = match['Opponent ID']
        match_date = match['Date']
        
        # Get opponent's win rate before this match
        opponent_data = all_match_data[
            (all_match_data['Wrestler ID'] == opponent) & 
            (all_match_data['Date'] < match_date)
        ]
        
        if len(opponent_data) > 0:
            opponent_win_rate = calculate_win_rate(opponent_data['Result'].tolist())
            if opponent_win_rate >= strong_threshold:
                strong_opponent_results.append((match['Result'], match['Opponent']))
    
    print(strong_opponent_results)
    
    return calculate_win_rate(strong_opponent_results) if strong_opponent_results else 0.5

In [93]:
print(calculate_strength_of_schedule(levi_historical_data['Opponent ID'].tolist(), raw, match_date))

0.6000057898423874


In [99]:
print(get_performance_vs_strong_opponents(levi_historical_data, raw, match_date, strong_threshold=0.8))

[('W', 'Jared Hill'), ('W', 'Paddy Gallagher'), ('L', 'Vince Zerban'), ('W', 'Teague Travis'), ('W', 'Carson Kharchla')]
0.0


In [95]:
keegan = raw[raw['Wrestler ID'] == 61119].copy()
print(keegan)

#print(calculate_win_rate(raw[raw['Wrestler ID'] == 61119]['Result'].tolist()))

Empty DataFrame
Columns: [Season, Date, Event, Weight Class, Result, Result Type, Score, Opponent, Opponent ID, Opponent Record, Opponent School, Wrestler, Wrestler ID]
Index: []
