In [1]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from matplotlib import pyplot
from datetime import datetime
import warnings
warnings.filterwarnings("ignore")
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression

In [2]:
DataFrame = pd.read_csv('../Data.csv')

In [3]:
WR = DataFrame[DataFrame['Position'] == 'WR']

In [4]:
WR = WR.drop(columns = ['Week','Position','Game','Unnamed: 0'])

In [5]:
WR = WR.drop(columns = ['PassingYards','PassingTDs','Ints','Rush Attempts', 'Rush Yards','Fumbles','RushTDs'])

In [6]:
WR['Style'] = ''
WR['Skill'] = ''

In [7]:
WR = WR.groupby(['Name','Year']).sum()

In [8]:
WR.reset_index(inplace=True)
WR.set_index('Year', inplace = True)

In [9]:
WR.index = pd.to_datetime(WR.index, format='%Y')

In [10]:
WR = WR.rename(columns = {'Fantasy Points':'Fantasy_Points'})
index_dtype = WR.index.dtype
print("Index Data Type:", index_dtype)

Index Data Type: datetime64[ns]


In [11]:
WR.loc[(WR['Receiving TDs'] >= 10) , 'Style'] = 'Touchdown Machine'
WR.loc[(WR['Receiving TDs'] < 10) & (WR['Receptions'] >= 85), 'Style'] = 'Catch Monster'
WR.loc[(WR['Receiving TDs'] < 10) & (WR['Receptions'] < 85), 'Style'] = 'Regular'

In [12]:
WR.loc[(WR['Receiving Yards'] >= 1200) , 'Skill'] = 'Elite'
WR.loc[(WR['Receiving Yards'] >= 700) & (WR['Receiving Yards'] < 1200), 'Skill'] = 'Starter'
WR.loc[(WR['Receiving Yards'] < 700), 'Skill'] = 'Back Up'

In [13]:
#testing with 2022 data and Receiving Yards being the feature 
X = WR.drop(['Fantasy_Points'], axis=1)
y = WR['Fantasy_Points']



numeric_features = ['Receiving Yards']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
 
    ]
)




pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])


X_train_subset = X_train[X_train.index.year < 2022]
y_train_subset = y_train[y_train.index.year < 2022]
pipeline.fit(X_train_subset, y_train_subset)


X_test_subset = X_test[X_test.index.year == 2022]
y_test_subset = y_test[y_test.index.year == 2022]
y_pred = pipeline.predict(X_test_subset)


mse = mean_squared_error(y_test_subset, y_pred)
r2 = r2_score(y_test_subset, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

Mean Squared Error: 109.7148946034343
R-squared: 0.9877669301141293


In [14]:
#same test but with receiving TDs
X = WR.drop(['Fantasy_Points'], axis=1)
y = WR['Fantasy_Points']



numeric_features = ['Receiving TDs']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
      
    ]
)




pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])


X_train_subset = X_train[X_train.index.year < 2022]
y_train_subset = y_train[y_train.index.year < 2022]
pipeline.fit(X_train_subset, y_train_subset)


X_test_subset = X_test[X_test.index.year == 2022]
y_test_subset = y_test[y_test.index.year == 2022]
y_pred = pipeline.predict(X_test_subset)


mse = mean_squared_error(y_test_subset, y_pred)
r2 = r2_score(y_test_subset, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

Mean Squared Error: 1203.4137407350775
R-squared: 0.8658209129650073


In [15]:
#test with 2023
X = WR.drop(['Fantasy_Points'], axis=1)
y = WR['Fantasy_Points']


numeric_features = ['Receiving Yards']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),

    ]
)




pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])


X_train_subset = X_train[X_train.index.year < 2023]
y_train_subset = y_train[y_train.index.year < 2023]
pipeline.fit(X_train_subset, y_train_subset)


X_test_subset = X_test[X_test.index.year == 2023]
y_test_subset = y_test[y_test.index.year == 2023]
y_pred = pipeline.predict(X_test_subset)


mse = mean_squared_error(y_test_subset, y_pred)
r2 = r2_score(y_test_subset, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

Mean Squared Error: 253.38984955597175
R-squared: 0.9757928345150757


In [16]:
#Arima Model to conduct a trend analysis that predicts 2024 Receving Yards to then put in model to make final FF point prediction

In [17]:
features = ['Receiving TDs', 'Receptions']
target_variable = 'Receiving Yards'

In [18]:
players = WR['Name'].unique()

# Create a dictionary to store predictions for each player
predictions_2024 = {}

for player in players:
    player_data = WR[WR['Name'] == player]
    time_series = player_data[features + [target_variable]]
    
    try:
        # Fit an ARIMA model
        model = ARIMA(time_series[target_variable], order=(1, 1, 1))  # Adjust the order as needed
        arima_result = model.fit()
        
        # Predict Fantasy Points for 2024
        start_date_2024 = datetime(2024, 1, 1)
        end_date_2024 = datetime(2024, 12, 31)
        y_pred_2024 = arima_result.predict(start=start_date_2024, end=end_date_2024, dynamic=False)
        
        # Store predictions in the dictionary
        predictions_2024[player] = y_pred_2024.values[-1]  # Only store the prediction for 2024
        
        
        
    except Exception as e:
        continue



        
        
        
 
        


In [19]:
predictions_2024

{'A.J. Brown': 1424.515994190584,
 'A.J. Green': 689.5532052288504,
 'Adam Humphries': 382.68673572578854,
 'Adam Thielen': 891.3236359646016,
 'Allen Lazard': 487.3780474943746,
 'Allen Robinson': 403.49803486350504,
 'Amari Cooper': 1121.9764981992953,
 'Amon-Ra St. Brown': 2230.1038060551564,
 'Andre Roberts': 34.296936562887566,
 'Andy Isabella': 82.25362161462314,
 'Anthony Miller': -461.8132952709001,
 'Antonio Brown': 495.8664154032123,
 'Ashton Dulin': 190.05818979384807,
 'Auden Tate': 75.2732143757205,
 'Bennett Skowronek': 83.87735051587651,
 'Brandin Cooks': 659.176588295759,
 'Brandon Aiyuk': 1975.0126522367666,
 'Brandon Zylstra': 246.32023244638435,
 'Braxton Berrios': 261.7672797089231,
 'Breshad Perriman': 168.312546914085,
 'Bryan Edwards': 544.0632381380533,
 'Byron Pringle': 239.57136261589764,
 'C.J. Board': 44.198407001989814,
 'Cam Sims': 221.43864338316513,
 'Cedrick Wilson': 256.26948205226597,
 'CeeDee Lamb': 2564.4861873493674,
 'Chase Claypool': -63.75287692

In [20]:
Predictions24 = pd.DataFrame(list(predictions_2024.items()), columns=['Name', 'Receiving Yards'])


In [21]:
Predictions24

Unnamed: 0,Name,Receiving Yards
0,A.J. Brown,1424.515994
1,A.J. Green,689.553205
2,Adam Humphries,382.686736
3,Adam Thielen,891.323636
4,Allen Lazard,487.378047
...,...,...
167,Tyron Johnson,-20.840513
168,Van Jefferson,401.410604
169,Will Fuller,512.728720
170,Zach Pascal,-86.990912


In [22]:
Predictions24['Year'] = 2024

In [23]:
Predictions24

Unnamed: 0,Name,Receiving Yards,Year
0,A.J. Brown,1424.515994,2024
1,A.J. Green,689.553205,2024
2,Adam Humphries,382.686736,2024
3,Adam Thielen,891.323636,2024
4,Allen Lazard,487.378047,2024
...,...,...,...
167,Tyron Johnson,-20.840513,2024
168,Van Jefferson,401.410604,2024
169,Will Fuller,512.728720,2024
170,Zach Pascal,-86.990912,2024


In [24]:
Predictions24.reset_index(inplace=True)
Predictions24.set_index('Year', inplace = True)
Predictions24.index = pd.to_datetime(Predictions24.index, format='%Y')

In [25]:
Predictions24.reset_index(inplace=True)

In [26]:
Predictions24.drop(columns = ['index'])

Unnamed: 0,Year,Name,Receiving Yards
0,2024-01-01,A.J. Brown,1424.515994
1,2024-01-01,A.J. Green,689.553205
2,2024-01-01,Adam Humphries,382.686736
3,2024-01-01,Adam Thielen,891.323636
4,2024-01-01,Allen Lazard,487.378047
...,...,...,...
167,2024-01-01,Tyron Johnson,-20.840513
168,2024-01-01,Van Jefferson,401.410604
169,2024-01-01,Will Fuller,512.728720
170,2024-01-01,Zach Pascal,-86.990912


In [27]:
Predictions24

Unnamed: 0,Year,index,Name,Receiving Yards
0,2024-01-01,0,A.J. Brown,1424.515994
1,2024-01-01,1,A.J. Green,689.553205
2,2024-01-01,2,Adam Humphries,382.686736
3,2024-01-01,3,Adam Thielen,891.323636
4,2024-01-01,4,Allen Lazard,487.378047
...,...,...,...,...
167,2024-01-01,167,Tyron Johnson,-20.840513
168,2024-01-01,168,Van Jefferson,401.410604
169,2024-01-01,169,Will Fuller,512.728720
170,2024-01-01,170,Zach Pascal,-86.990912


In [28]:
ReceivngYards = WR.drop(columns = ['Receptions','Receiving TDs'])

In [29]:
ReceivngYards = ReceivngYards.reset_index()

In [30]:
ReceivngYards

Unnamed: 0,Year,Name,Fantasy_Points,Receiving Yards,Style,Skill
0,2019-01-01,A.J. Brown,210.0,1051.0,Regular,Starter
1,2020-01-01,A.J. Brown,237.0,1075.0,Touchdown Machine,Starter
2,2021-01-01,A.J. Brown,174.0,869.0,Regular,Starter
3,2022-01-01,A.J. Brown,293.0,1496.0,Touchdown Machine,Elite
4,2023-01-01,A.J. Brown,283.0,1456.0,Catch Monster,Elite
...,...,...,...,...,...,...
1086,2019-01-01,Zay Jones,43.0,216.0,Regular,Back Up
1087,2020-01-01,Zay Jones,32.0,154.0,Regular,Back Up
1088,2021-01-01,Zay Jones,101.0,546.0,Regular,Back Up
1089,2022-01-01,Zay Jones,190.0,823.0,Regular,Starter


In [31]:
concatenated_df = pd.concat([ReceivngYards, Predictions24])

In [32]:
concatenated_df

Unnamed: 0,Year,Name,Fantasy_Points,Receiving Yards,Style,Skill,index
0,2019-01-01,A.J. Brown,210.0,1051.000000,Regular,Starter,
1,2020-01-01,A.J. Brown,237.0,1075.000000,Touchdown Machine,Starter,
2,2021-01-01,A.J. Brown,174.0,869.000000,Regular,Starter,
3,2022-01-01,A.J. Brown,293.0,1496.000000,Touchdown Machine,Elite,
4,2023-01-01,A.J. Brown,283.0,1456.000000,Catch Monster,Elite,
...,...,...,...,...,...,...,...
167,2024-01-01,Tyron Johnson,,-20.840513,,,167.0
168,2024-01-01,Van Jefferson,,401.410604,,,168.0
169,2024-01-01,Will Fuller,,512.728720,,,169.0
170,2024-01-01,Zach Pascal,,-86.990912,,,170.0


In [33]:
ReceivingYards2 = concatenated_df.sort_values(by=['Name','Year'], ascending=True)

In [34]:
ReceivingYards2 = ReceivingYards2.drop(columns = ['index'])

In [35]:
ReceivingYards2

Unnamed: 0,Year,Name,Fantasy_Points,Receiving Yards,Style,Skill
0,2019-01-01,A.J. Brown,210.0,1051.000000,Regular,Starter
1,2020-01-01,A.J. Brown,237.0,1075.000000,Touchdown Machine,Starter
2,2021-01-01,A.J. Brown,174.0,869.000000,Regular,Starter
3,2022-01-01,A.J. Brown,293.0,1496.000000,Touchdown Machine,Elite
4,2023-01-01,A.J. Brown,283.0,1456.000000,Catch Monster,Elite
...,...,...,...,...,...,...
1087,2020-01-01,Zay Jones,32.0,154.000000,Regular,Back Up
1088,2021-01-01,Zay Jones,101.0,546.000000,Regular,Back Up
1089,2022-01-01,Zay Jones,190.0,823.000000,Regular,Starter
1090,2023-01-01,Zay Jones,74.0,321.000000,Regular,Back Up


In [36]:
import pandas as pd
from datetime import datetime

# Assuming 'WR_merged' is your DataFrame containing the merged data with predicted yards
# Assuming 'pipeline' is your trained pipeline

# Filter 'WR_merged' to keep only the most recent prediction for each player
latest_predictions = ReceivingYards2.groupby('Name')['Receiving Yards'].last().reset_index()

# Preprocess the data for prediction
X_2024 = latest_predictions[['Receiving Yards']]

# Make predictions for fantasy points in 2024
y_pred_2024 = pipeline.predict(X_2024)

# Combine player names and their corresponding predicted fantasy points
player_predictions = pd.DataFrame({
    'Player': latest_predictions['Name'],
    'Predicted_Fantasy_Points_2024': y_pred_2024.round(2)  # Round predictions to 2 decimal places
})

# Print the predicted fantasy points for 2024
print(player_predictions)

                Player  Predicted_Fantasy_Points_2024
0           A.J. Brown                         297.62
1           A.J. Green                         143.85
2           A.T. Perry                          51.05
3       Adam Humphries                          79.65
4         Adam Thielen                         186.07
..                 ...                            ...
415      Xavier Gipson                          47.50
416  Xavier Hutchinson                          18.41
417        Zach Pascal                         -18.62
418        Zay Flowers                         179.09
419          Zay Jones                          76.59

[420 rows x 2 columns]


In [37]:
player_predictions

Unnamed: 0,Player,Predicted_Fantasy_Points_2024
0,A.J. Brown,297.62
1,A.J. Green,143.85
2,A.T. Perry,51.05
3,Adam Humphries,79.65
4,Adam Thielen,186.07
...,...,...
415,Xavier Gipson,47.50
416,Xavier Hutchinson,18.41
417,Zach Pascal,-18.62
418,Zay Flowers,179.09


In [38]:
DataFrame = pd.read_csv('../Data.csv')

In [39]:
WR2 = DataFrame[DataFrame['Position'] == 'WR']

In [40]:
WR2 = WR2.drop(columns = ['PassingYards','PassingTDs','Ints','Rush Attempts', 'Rush Yards','Fumbles','RushTDs'])

In [41]:
WR2 = WR2.drop(columns = ['Week','Position','Game','Unnamed: 0'])

In [42]:
WR2 = WR2.groupby(['Name','Year']).sum()

In [43]:
WR2 

Unnamed: 0_level_0,Unnamed: 1_level_0,Fantasy Points,Receptions,Receiving Yards,Receiving TDs
Name,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A.J. Brown,2019,210.0,52.0,1051.0,8.0
A.J. Brown,2020,237.0,70.0,1075.0,11.0
A.J. Brown,2021,174.0,63.0,869.0,5.0
A.J. Brown,2022,293.0,88.0,1496.0,11.0
A.J. Brown,2023,283.0,106.0,1456.0,7.0
...,...,...,...,...,...
Zay Jones,2019,43.0,27.0,216.0,0.0
Zay Jones,2020,32.0,14.0,154.0,1.0
Zay Jones,2021,101.0,47.0,546.0,1.0
Zay Jones,2022,190.0,82.0,823.0,5.0


In [44]:
WR2.reset_index(inplace=True)

In [45]:
WR2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1091 entries, 0 to 1090
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Name             1091 non-null   object 
 1   Year             1091 non-null   int64  
 2   Fantasy Points   1091 non-null   float64
 3   Receptions       1091 non-null   float64
 4   Receiving Yards  1091 non-null   float64
 5   Receiving TDs    1091 non-null   float64
dtypes: float64(4), int64(1), object(1)
memory usage: 51.3+ KB


In [46]:

# Create an empty dictionary to store the player names and their fantasy points by year
name_fantasy_dict = {}

# Iterate over the rows of the DataFrame
for index, row in WR2.iterrows():
    name = row['Name']
    year = row['Year']
    fantasy_points = row['Fantasy Points']
    
    if name not in name_fantasy_dict:  # If the player is not already in the dictionary
        name_fantasy_dict[name] = [fantasy_points if y == year else 0 for y in range(2019, 2024)]
    else:
        # If the player is already in the dictionary, update the corresponding value list
        name_fantasy_dict[name][year - 2019] = fantasy_points

# Print the resulting dictionary
print(name_fantasy_dict)


{'A.J. Brown': [210.0, 237.0, 174.0, 293.0, 283.0], 'A.J. Green': [0, 107.0, 150.0, 58.0, 0], 'A.T. Perry': [0, 0, 0, 0, 57.0], 'Adam Humphries': [83.0, 54.0, 73.0, 0, 0], 'Adam Thielen': [110.0, 246.0, 195.0, 171.0, 223.0], 'Albert Wilson': [81.0, 0, 43.0, 0, 0], 'Alec Pierce': [0, 0, 0, 107.0, 90.0], 'Alex Bachman': [0, 0, 0.0, 0, 0], 'Alex Erickson': [90.0, 19.0, 7.0, 0, 41.0], 'Alexander Hollins': [6.0, 0, 0, 0, 0], 'Allen Hurns': [79.0, 0, 0, 0, 0], 'Allen Lazard': [97.0, 92.0, 136.0, 167.0, 55.0], 'Allen Robinson': [248.0, 258.0, 82.0, 81.0, 56.0], 'Alshon Jeffery': [117.0, 22.0, 0, 0, 0], 'Amari Cooper': [238.0, 232.0, 195.0, 240.0, 219.0], 'Amari Rodgers': [0, 0, 7.0, 37.0, 0], 'Amon-Ra St. Brown': [0, 0, 219.0, 255.0, 324.0], 'Andre Baccellia': [0, 0, 0, 9.0, 0], 'Andre Patton': [9.0, 0, 0, 0, 0], 'Andre Roberts': [3.0, 4.0, 2.0, 0, 0], 'Andrei Iosivas': [0, 0, 0, 0, 46.0], 'Andy Isabella': [31.0, 51.0, 2.0, 4.0, 0], 'Anthony Miller': [123.0, 101.0, 14.0, 0, 0], 'Anthony Schwa

In [47]:
for index, row in player_predictions.iterrows():
    player = row['Player']
    predicted_points = row['Predicted_Fantasy_Points_2024']
    if player in name_fantasy_dict:
        name_fantasy_dict[player].append(predicted_points)
   

print(name_fantasy_dict)

{'A.J. Brown': [210.0, 237.0, 174.0, 293.0, 283.0, 297.62], 'A.J. Green': [0, 107.0, 150.0, 58.0, 0, 143.85], 'A.T. Perry': [0, 0, 0, 0, 57.0, 51.05], 'Adam Humphries': [83.0, 54.0, 73.0, 0, 0, 79.65], 'Adam Thielen': [110.0, 246.0, 195.0, 171.0, 223.0, 186.07], 'Albert Wilson': [81.0, 0, 43.0, 0, 0, 44.15], 'Alec Pierce': [0, 0, 0, 107.0, 90.0, 107.12], 'Alex Bachman': [0, 0, 0.0, 0, 0, -0.41], 'Alex Erickson': [90.0, 19.0, 7.0, 0, 41.0, 48.12], 'Alexander Hollins': [6.0, 0, 0, 0, 0, 9.21], 'Allen Hurns': [79.0, 0, 0, 0, 0, 86.62], 'Allen Lazard': [97.0, 92.0, 136.0, 167.0, 55.0, 101.55], 'Allen Robinson': [248.0, 258.0, 82.0, 81.0, 56.0, 84.0], 'Alshon Jeffery': [117.0, 22.0, 0, 0, 0, 23.65], 'Amari Cooper': [238.0, 232.0, 195.0, 240.0, 219.0, 234.32], 'Amari Rodgers': [0, 0, 7.0, 37.0, 0, 42.27], 'Amon-Ra St. Brown': [0, 0, 219.0, 255.0, 324.0, 466.16], 'Andre Baccellia': [0, 0, 0, 9.0, 0, 9.0], 'Andre Patton': [9.0, 0, 0, 0, 0, 11.3], 'Andre Roberts': [3.0, 4.0, 2.0, 0, 0, 6.76], '

In [48]:
import json
json_data = json.dumps(name_fantasy_dict, indent=4)
with open('../Flask/static/WRYearlyPoints.json','w') as json_file:
    json_file.write(json_data)

In [114]:
### Final Model We Used ####

Unnamed: 0,Player,Predicted_Fantasy_Points_2024
57,CeeDee Lamb,539.21
16,Amon-Ra St. Brown,468.82
404,Tyreek Hill,455.52
38,Brandon Aiyuk,415.12
326,Puka Nacua,312.18
...,...,...
330,Quintez Cephus,-116.01
325,Preston Williams,-129.79
205,Jarvis Landry,-146.00
288,Marvin Jones,-158.88


In [None]:
X = WR.drop(['Fantasy_Points'], axis=1)
y = WR['Fantasy_Points']


numeric_features = ['Receiving Yards']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),

    ]
)


pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])


X_train_subset = X_train[X_train.index.year < 2023]
y_train_subset = y_train[y_train.index.year < 2023]
pipeline.fit(X_train_subset, y_train_subset)


X_test_subset = X_test[X_test.index.year == 2023]
y_test_subset = y_test[y_test.index.year == 2023]
y_pred = pipeline.predict(X_test_subset)


mse = mean_squared_error(y_test_subset, y_pred)
r2 = r2_score(y_test_subset, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")
