**Shared Data Folder:**
https://drive.google.com/drive/folders/1iOAHBlJYE5Jqxbf_FFDpMXpMNpKdpdhB?usp=sharing

**Mount Drive with above folder stored in it**

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


**Run this block before any models to load and preprocess the data**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from collections import defaultdict

# Load all of WR data from 2002-2023
WR_training_files = [
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2002.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2003.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2004.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2005.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2006.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2007.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2008.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2009.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2010.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2011.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2012.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2013.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2014.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2015.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2016.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2017.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2018.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2019.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2020.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2021.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2022.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2023.csv'
]

df_training_list_WR = []
for file in WR_training_files:
    df = pd.read_csv(file)
    df_training_list_WR.append(df)

df_2002_to_2023_WR = pd.concat(df_training_list_WR, ignore_index=True)

# Load all of RB data from 2002-2023
RB_training_files = [
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2002.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2003.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2004.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2005.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2006.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2007.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2008.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2009.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2010.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2011.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2012.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2013.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2014.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2015.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2016.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2017.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2018.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2019.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2020.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2021.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2022.csv',
    '/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2023.csv'
]

df_training_list_RB = []
for file in RB_training_files:
    df = pd.read_csv(file)
    df_training_list_RB.append(df)

df_2002_to_2023_RB = pd.concat(df_training_list_RB, ignore_index=True)

# Load WR and RB data from 2024 season through week 6
df_WR_2024 = pd.read_csv('/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_WR_2024WK6.csv')
df_RB_2024 = pd.read_csv('/content/drive/My Drive/Fantasy Football/FantasyPros_Fantasy_Football_Statistics_RB_2024WK6.csv')


columns_to_convert_rb = ['ATT', 'YDS', 'Y/A', 'LG', '20+', 'TD', 'REC', 'TGT', 'YDS', 'Y/R', 'TD', 'G']
columns_to_convert_WR = ['REC', 'TGT', 'YDS', 'TD', 'ATT', 'YDS', 'TD', 'G', 'Y/R', 'LG', '20+']
for col in columns_to_convert_rb:
    df_2002_to_2023_RB[col] = pd.to_numeric(df_2002_to_2023_RB[col], errors='coerce')
    df_RB_2024[col] = pd.to_numeric(df_RB_2024[col], errors='coerce')
for col in columns_to_convert_WR:
    df_2002_to_2023_WR[col] = pd.to_numeric(df_2002_to_2023_WR[col], errors='coerce')
    df_WR_2024[col] = pd.to_numeric(df_WR_2024[col], errors='coerce')

df_2002_to_2023_WR = df_2002_to_2023_WR.dropna(subset=['G'])
df_2002_to_2023_RB = df_2002_to_2023_RB.dropna(subset=['G'])
df_WR_2024 = df_WR_2024.dropna(subset=['G'])
df_RB_2024 = df_RB_2024.dropna(subset=['G'])

#Convert data to per-game statistics, creating new features
df_2002_to_2023_WR['REC_per_game'] = df_2002_to_2023_WR['REC'] / df_2002_to_2023_WR['G']
df_2002_to_2023_WR['TGT_per_game'] = df_2002_to_2023_WR['TGT'] / df_2002_to_2023_WR['G']
df_2002_to_2023_WR['YDS_per_game'] = df_2002_to_2023_WR['YDS'] / df_2002_to_2023_WR['G']
df_2002_to_2023_WR['TD_per_game'] = df_2002_to_2023_WR['TD'] / df_2002_to_2023_WR['G']
df_2002_to_2023_WR['ATT_per_game'] = df_2002_to_2023_WR['ATT'] / df_2002_to_2023_WR['G']
df_2002_to_2023_WR['RUSH_YDS_per_game'] = df_2002_to_2023_WR['YDS'] / df_2002_to_2023_WR['G']
df_2002_to_2023_WR['RUSH_TD_per_game'] = df_2002_to_2023_WR['TD'] / df_2002_to_2023_WR['G']

df_2002_to_2023_RB['ATT_per_game'] = df_2002_to_2023_RB['ATT'] / df_2002_to_2023_RB['G']
df_2002_to_2023_RB['YDS_per_game'] = df_2002_to_2023_RB['YDS'] / df_2002_to_2023_RB['G']
df_2002_to_2023_RB['TD_per_game'] = df_2002_to_2023_RB['TD'] / df_2002_to_2023_RB['G']
df_2002_to_2023_RB['REC_per_game'] = df_2002_to_2023_RB['REC'] / df_2002_to_2023_RB['G']
df_2002_to_2023_RB['TGT_per_game'] = df_2002_to_2023_RB['TGT'] / df_2002_to_2023_RB['G']
df_2002_to_2023_RB['REC_YDS_per_game'] = df_2002_to_2023_RB['YDS'] / df_2002_to_2023_RB['G']
df_2002_to_2023_RB['REC_TD_per_game'] = df_2002_to_2023_RB['TD'] / df_2002_to_2023_RB['G']

df_WR_2024['REC_per_game'] = df_WR_2024['REC'] / df_WR_2024['G']
df_WR_2024['TGT_per_game'] = df_WR_2024['TGT'] / df_WR_2024['G']
df_WR_2024['YDS_per_game'] = df_WR_2024['YDS'] / df_WR_2024['G']
df_WR_2024['TD_per_game'] = df_WR_2024['TD'] / df_WR_2024['G']
df_WR_2024['ATT_per_game'] = df_WR_2024['ATT'] / df_WR_2024['G']
df_WR_2024['RUSH_YDS_per_game'] = df_WR_2024['YDS'] / df_WR_2024['G']
df_WR_2024['RUSH_TD_per_game'] = df_WR_2024['TD'] / df_WR_2024['G']

df_RB_2024['ATT_per_game'] = df_RB_2024['ATT'] / df_RB_2024['G']
df_RB_2024['YDS_per_game'] = df_RB_2024['YDS'] / df_RB_2024['G']
df_RB_2024['TD_per_game'] = df_RB_2024['TD'] / df_RB_2024['G']
df_RB_2024['REC_per_game'] = df_RB_2024['REC'] / df_RB_2024['G']
df_RB_2024['TGT_per_game'] = df_RB_2024['TGT'] / df_RB_2024['G']
df_RB_2024['REC_YDS_per_game'] = df_RB_2024['YDS'] / df_RB_2024['G']
df_RB_2024['REC_TD_per_game'] = df_RB_2024['TD'] / df_RB_2024['G']

#Function for defining and assigning the tiers based on player rank for RBs
def assign_tier_rb(rank):
    if rank <= 12:
        return 'RB1'
    elif rank <= 24:
        return 'RB2'
    elif rank <= 36:
        return 'Flex'
    elif rank <= 60:
        return 'Bench'
    else:
        return 'Cut'

df_2002_to_2023_RB['Tier'] = df_2002_to_2023_RB['Rank'].apply(assign_tier_rb)

#Function for defining and assigning the tiers based on player rank for WRs
def assign_tier_wr(rank):
    if rank <= 12:
        return 'WR1'
    elif rank <= 24:
        return 'WR2'
    elif rank <= 36:
        return 'Flex'
    elif rank <= 60:
        return 'Bench'
    else:
        return 'Cut'

df_2002_to_2023_WR['Tier'] = df_2002_to_2023_WR['Rank'].apply(assign_tier_wr)
df_WR_2024['Tier'] = df_WR_2024['Rank'].apply(assign_tier_wr)
df_RB_2024['Tier'] = df_RB_2024['Rank'].apply(assign_tier_rb)

#Split RB and WR data into 2 data frames, one for the features defined, the otrher for the target feature of tier.
WR_features = ['REC_per_game', 'TGT_per_game', 'YDS_per_game', 'Y/R', 'LG', '20+', 'TD_per_game', 'ATT_per_game', 'RUSH_YDS_per_game', 'RUSH_TD_per_game']
RB_features = ['ATT_per_game', 'YDS_per_game', 'TD_per_game', 'REC_per_game', 'TGT_per_game', 'REC_YDS_per_game', 'REC_TD_per_game', 'LG', '20+','Y/A']
X_train_RB = df_2002_to_2023_RB[RB_features]
y_train_RB = df_2002_to_2023_RB['Tier']
X_train_WR = df_2002_to_2023_WR[WR_features]
y_train_WR = df_2002_to_2023_WR['Tier']
X_WR_2024 = df_WR_2024[WR_features].fillna(0)
X_RB_2024 = df_RB_2024[RB_features].fillna(0)
y_WR_2024 = df_WR_2024['Tier']
y_RB_2024 = df_RB_2024['Tier']

X_train_RB = X_train_RB.fillna(0)
X_train_WR = X_train_WR.fillna(0)

# 80/20 train test split on training data
X_train_RB, X_test_RB, y_train_RB, y_test_RB = train_test_split(
    X_train_RB, y_train_RB, test_size=0.2, random_state=42
)

X_train_WR, X_test_WR, y_train_WR, y_test_WR = train_test_split(
    X_train_WR, y_train_WR, test_size=0.2, random_state=42
)






A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2002_to_2023_WR['REC_per_game'] = df_2002_to_2023_WR['REC'] / df_2002_to_2023_WR['G']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2002_to_2023_WR['TGT_per_game'] = df_2002_to_2023_WR['TGT'] / df_2002_to_2023_WR['G']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2002_to_2023_WR['YDS_per

**Run this block for the Decision Tree model, the large section commented out builds trees of depth 5-12 and plots training and testing accuracy vs. tree depth, increases run time to around 12 minutes**

In [None]:
#Block For Decision Tree

#Decision Tree class
class CustomDecisionTree:
    #Initalize the tree with the parameters max depth and minimum samples required to split a node
    def __init__(self, max_depth=5, min_samples_split=2):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.tree = None

    # Calculate entropy, used to determine where to split the node
    def entropy(self, y):
        classes, counts = np.unique(y, return_counts=True)
        return -np.sum((counts / len(y)) * np.log2(counts / len(y) + 1e-9))

    # Determine the best splitting point for a node by iterating through features and miniimizing entropy
    def best_split(self, X, y):
        best_entropy = float("inf")
        best_split = None
        n_samples, n_features = X.shape

        for feature in range(n_features):
            thresholds = np.unique(X[:, feature])
            for threshold in thresholds:
                left_idx = X[:, feature] < threshold
                right_idx = X[:, feature] >= threshold
                if sum(left_idx) < self.min_samples_split or sum(right_idx) < self.min_samples_split:
                    continue

                left_y, right_y = y[left_idx], y[right_idx]
                split_entropy = (len(left_y) * self.entropy(left_y) + len(right_y) * self.entropy(right_y)) / len(y)

                if split_entropy < best_entropy:
                    best_entropy = split_entropy
                    best_split = {
                        "feature": feature,
                        "threshold": threshold,
                        "left_idx": left_idx,
                        "right_idx": right_idx
                    }
        return best_split

    # Recursively build the tree using best split function, and max depth parameter.
    def build_tree(self, X, y, depth=0):
        n_samples, n_features = X.shape
        num_classes = len(set(y))

        if depth >= self.max_depth or n_samples < self.min_samples_split or num_classes == 1:
            unique_classes, y_integers = np.unique(y, return_inverse=True)
            most_common_class_index = np.bincount(y_integers).argmax()
            most_common_class = unique_classes[most_common_class_index]
            return {"leaf": most_common_class}

        best_split = self.best_split(X, y)
        if not best_split:
            unique_classes, y_integers = np.unique(y, return_inverse=True)
            most_common_class_index = np.bincount(y_integers).argmax()
            most_common_class = unique_classes[most_common_class_index]
            return {"leaf": most_common_class}

        left_tree = self.build_tree(X[best_split["left_idx"]], y[best_split["left_idx"]], depth + 1)
        right_tree = self.build_tree(X[best_split["right_idx"]], y[best_split["right_idx"]], depth + 1)

        return {
            "feature": best_split["feature"],
            "threshold": best_split["threshold"],
            "left": left_tree,
            "right": right_tree
        }

    #Fit the tree to the training data
    def fit(self, X, y):
        self.tree = self.build_tree(np.array(X), np.array(y))

    #Predict a feature for a single sample
    def predict_sample(self, x, tree):
        if "leaf" in tree:
            return tree["leaf"]
        feature = tree["feature"]
        threshold = tree["threshold"]
        if x[feature] < threshold:
            return self.predict_sample(x, tree["left"])
        else:
            return self.predict_sample(x, tree["right"])

    # Predict a feature for multiple samples
    def predict(self, X):
        return [self.predict_sample(x, self.tree) for x in np.array(X)]

# # Training and plotting accuracies for different depths
# def plot_accuracy_vs_depth(X_train, y_train, X_test, y_test, max_depths, label):
#     train_accuracies = []
#     test_accuracies = []

      # Build trees from depth 5-max depths
#     for depth in range(5, max_depths + 1):
#         tree = CustomDecisionTree(max_depth=depth, min_samples_split=10)
#         tree.fit(X_train, y_train)

#         # Training accuracy calculation
#         y_train_pred = tree.predict(X_train)
#         train_accuracy = accuracy_score(y_train, y_train_pred)
#         train_accuracies.append(train_accuracy)

#         # Testing accuracy calculation
#         y_test_pred = tree.predict(X_test)
#         test_accuracy = accuracy_score(y_test, y_test_pred)
#         test_accuracies.append(test_accuracy)

      # Plot training and testing accuracy vs. Tree depth
#     plt.plot(range(5, max_depths + 1), train_accuracies, label=f'{label} Training')
#     plt.plot(range(5, max_depths + 1), test_accuracies, label=f'{label} Testing')
#     plt.xlabel("Tree Depth")
#     plt.ylabel("Accuracy")
#     plt.title(f"Training and Testing Accuracy vs. Tree Depth for {label}")
#     plt.legend()
#     plt.grid()

# max_depths = 12

# plt.figure(figsize=(12, 5))

# # WR Data Plot
# plt.subplot(1, 2, 1)
# plot_accuracy_vs_depth(X_train_WR, y_train_WR, X_test_WR, y_test_WR, max_depths, label="WR")

# # RB Data Plot
# plt.subplot(1, 2, 2)
# plot_accuracy_vs_depth(X_train_RB, y_train_RB, X_test_RB, y_test_RB, max_depths, label="RB")

# plt.tight_layout()
# plt.show()

# Train and evaluate the WR decision tree at depth 9
wr_depth = 9
rb_depth = 8

# Initialize and train the WR tree
best_tree_WR = CustomDecisionTree(max_depth=wr_depth)
best_tree_WR.fit(X_train_WR, y_train_WR)

# Function for applying evaluation metrics to tree
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    return accuracy, precision, recall, f1

# Evaluate the WR tree on the test set
accuracy_wr, precision_wr, recall_wr, f1_wr = evaluate_model(best_tree_WR, X_test_WR, y_test_WR)

# Print WR evaluation metrics
print(f"WR Tree Depth: {wr_depth}")
print(f"WR Testing Accuracy: {accuracy_wr:.4f}")
print(f"WR Precision: {precision_wr:.4f}")
print(f"WR Recall: {recall_wr:.4f}")
print(f"WR f1: {f1_wr:.4f}")

# Initialize and train the RB tree
best_tree_RB = CustomDecisionTree(max_depth=rb_depth)
best_tree_RB.fit(X_train_RB, y_train_RB)

# Evaluate the RB tree on the test set
accuracy_rb, precision_rb, recall_rb, f1_rb = evaluate_model(best_tree_RB, X_test_RB, y_test_RB)

# Print RB evaluation metrics
print(f"\nRB Tree Depth: {rb_depth}")
print(f"RB Testing Accuracy: {accuracy_rb:.4f}")
print(f"RB Precision: {precision_rb:.4f}")
print(f"RB Recall: {recall_rb:.4f}")
print(f"RB f1: {f1_rb:.4f}")

# Predict 2024 tiers using best trees
y_2024_WR_pred = best_tree_WR.predict(X_WR_2024)
y_2024_RB_pred = best_tree_RB.predict(X_RB_2024)

# Create DataFrames to compare predictions with actual tiers
df_WR_predictions = df_WR_2024.copy()
df_WR_predictions['Predicted_Tier'] = y_2024_WR_pred

df_RB_predictions = df_RB_2024.copy()
df_RB_predictions['Predicted_Tier'] = y_2024_RB_pred

# Filter players classified as WR1, WR2, RB1, RB2
WR1_players = df_WR_predictions[df_WR_predictions['Predicted_Tier'] == 'WR1']
WR2_players = df_WR_predictions[df_WR_predictions['Predicted_Tier'] == 'WR2']
RB1_players = df_RB_predictions[df_RB_predictions['Predicted_Tier'] == 'RB1']
RB2_players = df_RB_predictions[df_RB_predictions['Predicted_Tier'] == 'RB2']

# Print WR1 players
print("WR1 Players:")
print(WR1_players[['Player', 'Rank', 'Predicted_Tier', 'Tier']])

# Print WR2 players
print("\nWR2 Players:")
print(WR2_players[['Player', 'Rank', 'Predicted_Tier', 'Tier']])

# Print RB1 players
print("\nRB1 Players:")
print(RB1_players[['Player', 'Rank', 'Predicted_Tier', 'Tier']])

# Print RB2 players
print("\nRB2 Players:")
print(RB2_players[['Player', 'Rank', 'Predicted_Tier', 'Tier']])


WR Tree Depth: 9
WR Testing Accuracy: 0.9058
WR Precision: 0.9040
WR Recall: 0.9058
WR f1: 0.9045

RB Tree Depth: 8
RB Testing Accuracy: 0.8434
RB Precision: 0.8381
RB Recall: 0.8434
RB f1: 0.8392
WR1 Players:
                     Player  Rank Predicted_Tier Tier
0          A.J. Brown (PHI)   1.0            WR1  WR1
1        Malik Nabers (NYG)   2.0            WR1  WR1
2        Nico Collins (HOU)   3.0            WR1  WR1
3         Chris Godwin (TB)   4.0            WR1  WR1
4       Ja'Marr Chase (CIN)   5.0            WR1  WR1
5         Cooper Kupp (LAR)   6.0            WR1  WR1
8        Drake London (ATL)   9.0            WR1  WR1
9        Stefon Diggs (HOU)  10.0            WR1  WR1
10      DeVonta Smith (PHI)  11.0            WR1  WR1
11  Amon-Ra St. Brown (DET)  12.0            WR1  WR1
12     Garrett Wilson (NYJ)  13.0            WR1  WR2
13         Rashee Rice (KC)  13.0            WR1  WR2
14        Tee Higgins (CIN)  15.0            WR1  WR2
15         Josh Downs (IND)  15.0 

**Run this block for Naive Bayes Classifier**

In [None]:
#Block For Naive Bayes
#Naive Bayes Calssifier class
class CustomNaiveBayes:
    # Initaite Naive bayes classifier
    def __init__(self):
        self.class_priors = {}
        self.feature_stats = {}

    # Function to fit training data to model
    def fit(self, X, y):
        n_samples, n_features = X.shape
        classes = np.unique(y)

        # Calculate priors and feature stats for each class
        for cls in classes:
            X_class = X[y == cls]
            self.class_priors[cls] = len(X_class) / n_samples

            # For each feature, store the mean and variance for the current class
            self.feature_stats[cls] = [
                {"mean": np.mean(X_class.iloc[:, j]), "var": np.var(X_class.iloc[:, j]) + 1e-9}
                for j in range(n_features)
            ]
    # Calculates the likelihood of a feature given a class using the Gaussian probability density function
    def calculate_likelihood(self, x, cls):
        likelihood = 1.0
        for i in range(len(x)):
            mean = self.feature_stats[cls][i]["mean"]
            var = self.feature_stats[cls][i]["var"]
            exponent = np.exp(-((x[i] - mean) ** 2) / (2 * var))
            likelihood *= (1 / np.sqrt(2 * np.pi * var)) * exponent
        return likelihood

    #Calculate the psoterior by multiplying prior probability with the likelihood
    def calculate_posterior(self, x, cls):
        prior = self.class_priors[cls]
        likelihood = self.calculate_likelihood(x, cls)
        return prior * likelihood

    # Predict the class for a single sample
    def predict_sample(self, x):
        posteriors = {cls: self.calculate_posterior(x, cls) for cls in self.class_priors}
        return max(posteriors, key=posteriors.get)

    # Predict the class for multiple samples
    def predict(self, X):
        return np.array([self.predict_sample(x) for x in X.values])

# Define a function to evaluate model performance
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    return accuracy, precision, recall, f1


# Training and evaluating the Naive Bayes classifier for WR data
nb_model_WR = CustomNaiveBayes()
nb_model_WR.fit(X_train_WR, y_train_WR)

accuracy_wr, precision_wr, recall_wr, f1_wr = evaluate_model(nb_model_WR, X_test_WR, y_test_WR)

# Print WR evaluation metrics
print("Custom Naive Bayes WR Model")
print(f"Testing Accuracy: {accuracy_wr:.4f}")
print(f"Precision: {precision_wr:.4f}")
print(f"Recall: {recall_wr:.4f}")
print(f"F1: {f1_wr:.4f}")

# Training and evaluating the Naive Bayes classifier for RB data
nb_model_RB = CustomNaiveBayes()
nb_model_RB.fit(X_train_RB, y_train_RB)

accuracy_rb, precision_rb, recall_rb, f1_rb = evaluate_model(nb_model_RB, X_test_RB, y_test_RB)

# Print RB evaluation metrics
print("\nCustom Naive Bayes RB Model")
print(f"Testing Accuracy: {accuracy_rb:.4f}")
print(f"Precision: {precision_rb:.4f}")
print(f"Recall: {recall_rb:.4f}")
print(f"F1: {f1_rb:.4f}")

y_2024_WR_pred = nb_model_WR.predict(X_WR_2024)

# Predict RB tiers using the best RB tree
y_2024_RB_pred = nb_model_RB.predict(X_RB_2024)

# Create DataFrames to compare predictions with actual tiers
df_WR_predictions = df_WR_2024.copy()
df_WR_predictions['Predicted_Tier'] = y_2024_WR_pred

df_RB_predictions = df_RB_2024.copy()
df_RB_predictions['Predicted_Tier'] = y_2024_RB_pred

# Filter players classified as WR1, WR2, RB1, RB2
WR1_players = df_WR_predictions[df_WR_predictions['Predicted_Tier'] == 'WR1']
WR2_players = df_WR_predictions[df_WR_predictions['Predicted_Tier'] == 'WR2']
RB1_players = df_RB_predictions[df_RB_predictions['Predicted_Tier'] == 'RB1']
RB2_players = df_RB_predictions[df_RB_predictions['Predicted_Tier'] == 'RB2']

# Print WR1 players
print("WR1 Players:")
print(WR1_players[['Player', 'Rank', 'Predicted_Tier', 'Tier']])

# Print WR2 players
print("\nWR2 Players:")
print(WR2_players[['Player', 'Rank', 'Predicted_Tier', 'Tier']])

# Print RB1 players
print("\nRB1 Players:")
print(RB1_players[['Player', 'Rank', 'Predicted_Tier', 'Tier']])

# Print RB2 players
print("\nRB2 Players:")
print(RB2_players[['Player', 'Rank', 'Predicted_Tier', 'Tier']])


Custom Naive Bayes WR Model
Testing Accuracy: 0.8381
Precision: 0.8763
Recall: 0.8381
F1: 0.8464

Custom Naive Bayes RB Model
Testing Accuracy: 0.7959
Precision: 0.8358
Recall: 0.7959
F1: 0.8085
WR1 Players:
                     Player  Rank Predicted_Tier   Tier
0          A.J. Brown (PHI)   1.0            WR1    WR1
1        Malik Nabers (NYG)   2.0            WR1    WR1
3         Chris Godwin (TB)   4.0            WR1    WR1
4       Ja'Marr Chase (CIN)   5.0            WR1    WR1
5         Cooper Kupp (LAR)   6.0            WR1    WR1
6    Justin Jefferson (MIN)   7.0            WR1    WR1
8        Drake London (ATL)   9.0            WR1    WR1
17       Allen Lazard (NYJ)  18.0            WR1    WR2
22          Mike Evans (TB)  23.0            WR1    WR2
24    Deebo Samuel Sr. (SF)  25.0            WR1   Flex
53  Kristian Wilkerson (LV)  51.0            WR1  Bench

WR2 Players:
                  Player  Rank Predicted_Tier Tier
2     Nico Collins (HOU)   3.0            WR2  WR1
10  

**Run this block for Graident Boosting Classifier**

In [None]:
#Block for Gradient Boosting
from sklearn.tree import DecisionTreeRegressor

#Gradient Boosting Classifier class
class CustomGradientBoostingClassifier:

    #Initialize the gradient boosting classifer
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3, min_samples_split=2):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.trees = []
        self.classes = None

    # Initialize the predictions
    def initialize_predictions(self, y):
        n_samples = len(y)
        n_classes = len(self.classes)
        initial_predictions = np.full((n_samples, n_classes), 1 / n_classes)
        return initial_predictions

    # Calculate the gradient of log-loss
    def log_loss_gradient(self, y_encoded, probabilities):
        return y_encoded - probabilities

    # Transform raw scores into probabilities using softmax function
    def softmax(self, raw_scores):
        exp_raw_scores = np.exp(raw_scores - np.max(raw_scores, axis=1, keepdims=True))
        return exp_raw_scores / np.sum(exp_raw_scores, axis=1, keepdims=True)

    # Encode class labels as numeric vectors
    def encode(self, y):
        encoded = np.zeros((len(y), len(self.classes)))
        for i, cls in enumerate(y):
            encoded[i, self.classes.index(cls)] = 1
        return encoded

    # Fit the training data to the model
    def fit(self, X, y):
        self.classes = np.unique(y).tolist()
        y_encoded = self.encode(y)
        n_samples, n_classes = y_encoded.shape
        F = self.initialize_predictions(y)

        for estimator in range(self.n_estimators):
            gradients = self.log_loss_gradient(y_encoded, F)
            trees = []

            # Train a tree for each class
            for c in range(n_classes):
                tree = DecisionTreeRegressor(max_depth=self.max_depth, min_samples_split=self.min_samples_split)
                tree.fit(X, gradients[:, c])
                trees.append(tree)

            # Update predictions with scaled outputs of the trees
            for c, tree in enumerate(trees):
                F[:, c] += self.learning_rate * tree.predict(X)

            self.trees.append(trees)

    # Predict probabilities for each class
    def predict_probabilities(self, X):
        n_samples = X.shape[0]
        n_classes = len(self.classes)
        F = np.zeros((n_samples, n_classes))

        for trees in self.trees:
            for c, tree in enumerate(trees):
                F[:, c] += self.learning_rate * tree.predict(X)

        return self.softmax(F)

    # Predict class of multiple samples
    def predict(self, X):
        probabilities = self.predict_probabilities(X)
        class_indices = np.argmax(probabilities, axis=1)
        return np.array([self.classes[idx] for idx in class_indices])

# Define a function to evaluate model performance
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    return accuracy, precision, recall, f1

# Training and evaluating the Custom Gradient Boosting Classifier for WR data
gb_classifier_WR = CustomGradientBoostingClassifier(n_estimators=50, learning_rate=0.1, max_depth=3)
gb_classifier_WR.fit(X_train_WR, y_train_WR)

accuracy_wr, precision_wr, recall_wr, f1_wr = evaluate_model(gb_classifier_WR, X_test_WR, y_test_WR)

print("Custom Gradient Boosting WR Model")
print(f"Testing Accuracy: {accuracy_wr:.4f}")
print(f"Precision: {precision_wr:.4f}")
print(f"Recall: {recall_wr:.4f}")
print(f"F1: {f1_wr:.4f}")

# Training and evaluating the Custom Gradient Boosting Classifier for RB data
gb_classifier_RB = CustomGradientBoostingClassifier(n_estimators=50, learning_rate=0.1, max_depth=3)
gb_classifier_RB.fit(X_train_RB, y_train_RB)

accuracy_rb, precision_rb, recall_rb, f1_rb = evaluate_model(gb_classifier_RB, X_test_RB, y_test_RB)

print("\nCustom Gradient Boosting RB Model")
print(f"Testing Accuracy: {accuracy_rb:.4f}")
print(f"Precision: {precision_rb:.4f}")
print(f"Recall: {recall_rb:.4f}")
print(f"WR f1: {f1_rb:.4f}")

y_2024_WR_pred = gb_classifier_WR.predict(X_WR_2024)

# Predict RB tiers using the best RB tree
y_2024_RB_pred = gb_classifier_RB.predict(X_RB_2024)

# Create DataFrames to compare predictions with actual tiers
df_WR_predictions = df_WR_2024.copy()
df_WR_predictions['Predicted_Tier'] = y_2024_WR_pred

df_RB_predictions = df_RB_2024.copy()
df_RB_predictions['Predicted_Tier'] = y_2024_RB_pred

# Filter players classified as WR1, WR2, RB1, RB2
WR1_players = df_WR_predictions[df_WR_predictions['Predicted_Tier'] == 'WR1']
WR2_players = df_WR_predictions[df_WR_predictions['Predicted_Tier'] == 'WR2']
RB1_players = df_RB_predictions[df_RB_predictions['Predicted_Tier'] == 'RB1']
RB2_players = df_RB_predictions[df_RB_predictions['Predicted_Tier'] == 'RB2']

# Print WR1 players
print("WR1 Players:")
print(WR1_players[['Player', 'Rank', 'Predicted_Tier', 'Tier']])

# Print WR2 players
print("\nWR2 Players:")
print(WR2_players[['Player', 'Rank', 'Predicted_Tier', 'Tier']])

# Print RB1 players
print("\nRB1 Players:")
print(RB1_players[['Player', 'Rank', 'Predicted_Tier', 'Tier']])

# Print RB2 players
print("\nRB2 Players:")
print(RB2_players[['Player', 'Rank', 'Predicted_Tier', 'Tier']])


Custom Gradient Boosting WR Model
Testing Accuracy: 0.9143
Precision: 0.9116
Recall: 0.9143
F1: 0.9120

Custom Gradient Boosting RB Model
Testing Accuracy: 0.8768
Precision: 0.8754
Recall: 0.8768
WR f1: 0.8757
WR1 Players:
                     Player  Rank Predicted_Tier Tier
0          A.J. Brown (PHI)   1.0            WR1  WR1
1        Malik Nabers (NYG)   2.0            WR1  WR1
2        Nico Collins (HOU)   3.0            WR1  WR1
3         Chris Godwin (TB)   4.0            WR1  WR1
4       Ja'Marr Chase (CIN)   5.0            WR1  WR1
5         Cooper Kupp (LAR)   6.0            WR1  WR1
6    Justin Jefferson (MIN)   7.0            WR1  WR1
8        Drake London (ATL)   9.0            WR1  WR1
9        Stefon Diggs (HOU)  10.0            WR1  WR1
10      DeVonta Smith (PHI)  11.0            WR1  WR1
11  Amon-Ra St. Brown (DET)  12.0            WR1  WR1
12     Garrett Wilson (NYJ)  13.0            WR1  WR2
13         Rashee Rice (KC)  13.0            WR1  WR2
14        Tee Higgins