In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import pandas as pd
import numpy as np
import io
import random
import math
import scipy.stats
import matplotlib.pylab as plt
import seaborn as sns

  from IPython.core.display import display, HTML


In [20]:
data = pd.read_excel('data/FFN_History_data.xlsx')

In [21]:
data

Unnamed: 0,ID,Name,League,Year,League Year #,Rank,Sport,Minigame?,Dynasty?,Bestball?,Orphaned?
0,112.0,Grant,FFN Alumni,2021,1,1.0,Football,0,1,0,0
1,109.0,Ari,FFN Alumni,2021,1,2.0,Football,0,1,0,0
2,103.0,Cam,FFN Alumni,2021,1,3.0,Football,0,1,0,0
3,106.0,Kyle,FFN Alumni,2021,1,4.0,Football,0,1,0,0
4,127.0,DMac,FFN Alumni,2021,1,5.0,Football,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...
443,162.0,Chad,CFB Fantasy Redraft,2023,1,5.0,Football,0,0,0,0
444,163.0,Josh,CFB Fantasy Redraft,2023,1,7.0,Football,0,0,0,0
445,124.0,Mitch,CFB Fantasy Redraft,2023,1,10.0,Football,0,0,0,0
446,104.0,Double,CFB Fantasy Redraft,2023,1,11.0,Football,0,0,0,0


## Compute Total Number of Participants in Each Contest

In [22]:
# Calculate the total number of unique participants in each league per year
total_players = data.groupby(['League', 'Year'])['ID'].nunique().reset_index(name='Total Players')

# Merge the total_players with the original dataframe to add the Total Players column
data_merged = pd.merge(data, total_players, how='left', on=['League', 'Year'])

data_merged.tail()

Unnamed: 0,ID,Name,League,Year,League Year #,Rank,Sport,Minigame?,Dynasty?,Bestball?,Orphaned?,Total Players
443,162.0,Chad,CFB Fantasy Redraft,2023,1,5.0,Football,0,0,0,0,6
444,163.0,Josh,CFB Fantasy Redraft,2023,1,7.0,Football,0,0,0,0,6
445,124.0,Mitch,CFB Fantasy Redraft,2023,1,10.0,Football,0,0,0,0,6
446,104.0,Double,CFB Fantasy Redraft,2023,1,11.0,Football,0,0,0,0,6
447,164.0,Noah,CFB Fantasy Redraft,2023,1,12.0,Football,0,0,0,0,6


In [23]:
# Update the Data DataFrame to reflect the total players results
data['Total Players'] = data_merged['Total Players']

# Find the largest Rank in each league-year combination
largest_rank = data.groupby(['League', 'Year'])['Rank'].max().reset_index(name='Largest Rank')

# Merge this back with the original DataFrame
data_with_largest_rank = pd.merge(data, largest_rank, how='left', on=['League', 'Year'])

# Update the 'Total Players' column based on the largest rank exceeding the current total players value
data_with_largest_rank['Total Players'] = data_with_largest_rank.apply(
    lambda row: max(row['Total Players'], row['Largest Rank']), axis=1
)

# Dropping the 'Largest Rank' column as it's no longer needed
data_with_largest_rank.drop(columns=['Largest Rank'], inplace=True)

data_with_largest_rank.tail()  # Displaying the last few rows to verify the update

Unnamed: 0,ID,Name,League,Year,League Year #,Rank,Sport,Minigame?,Dynasty?,Bestball?,Orphaned?,Total Players
443,162.0,Chad,CFB Fantasy Redraft,2023,1,5.0,Football,0,0,0,0,12.0
444,163.0,Josh,CFB Fantasy Redraft,2023,1,7.0,Football,0,0,0,0,12.0
445,124.0,Mitch,CFB Fantasy Redraft,2023,1,10.0,Football,0,0,0,0,12.0
446,104.0,Double,CFB Fantasy Redraft,2023,1,11.0,Football,0,0,0,0,12.0
447,164.0,Noah,CFB Fantasy Redraft,2023,1,12.0,Football,0,0,0,0,12.0


In [24]:
# Update the Data DataFrame to reflect the total players results
data['Total Players'] = data_with_largest_rank['Total Players']

In [25]:
data

Unnamed: 0,ID,Name,League,Year,League Year #,Rank,Sport,Minigame?,Dynasty?,Bestball?,Orphaned?,Total Players
0,112.0,Grant,FFN Alumni,2021,1,1.0,Football,0,1,0,0,12.0
1,109.0,Ari,FFN Alumni,2021,1,2.0,Football,0,1,0,0,12.0
2,103.0,Cam,FFN Alumni,2021,1,3.0,Football,0,1,0,0,12.0
3,106.0,Kyle,FFN Alumni,2021,1,4.0,Football,0,1,0,0,12.0
4,127.0,DMac,FFN Alumni,2021,1,5.0,Football,0,1,0,0,12.0
...,...,...,...,...,...,...,...,...,...,...,...,...
443,162.0,Chad,CFB Fantasy Redraft,2023,1,5.0,Football,0,0,0,0,12.0
444,163.0,Josh,CFB Fantasy Redraft,2023,1,7.0,Football,0,0,0,0,12.0
445,124.0,Mitch,CFB Fantasy Redraft,2023,1,10.0,Football,0,0,0,0,12.0
446,104.0,Double,CFB Fantasy Redraft,2023,1,11.0,Football,0,0,0,0,12.0


## Compute Adjusted Percentile Rank

### In order to account for the difference in competition size, an adjusted percentile rank needs to be implemented based on the following formula:

$$
\text{Adjusted Percentile} = \left( \frac{n - \text{rank}}{n - 1} \right) \cdot \ln(1 + n)
$$

where:

- $\text{rank}$ is the position of an individual within the league, with 1 being the highest position.
- $n$ is the total number of participants in the league.


In [26]:
# Define the function to calculate Adjusted Percentile
def calculate_adjusted_percentile(row):
    n = row['Total Players']
    rank = row['Rank']
    if n > 1:
        adjusted_percentile = ((n - rank) / (n - 1)) * np.log(1 + n)
    else:
        adjusted_percentile = np.log(2)  # When there's only one player, use log(2) as a placeholder
    return adjusted_percentile

In [30]:
# Apply the function to compute Adjusted Percentile for each row
data['Adjusted Percentile'] = data.apply(calculate_adjusted_percentile, axis=1)

data.head()

Unnamed: 0,ID,Name,League,Year,League Year #,Rank,Sport,Minigame?,Dynasty?,Bestball?,Orphaned?,Total Players,Adjusted Percentile
0,112.0,Grant,FFN Alumni,2021,1,1.0,Football,0,1,0,0,12.0,2.564949
1,109.0,Ari,FFN Alumni,2021,1,2.0,Football,0,1,0,0,12.0,2.331772
2,103.0,Cam,FFN Alumni,2021,1,3.0,Football,0,1,0,0,12.0,2.098595
3,106.0,Kyle,FFN Alumni,2021,1,4.0,Football,0,1,0,0,12.0,1.865418
4,127.0,DMac,FFN Alumni,2021,1,5.0,Football,0,1,0,0,12.0,1.632241
