In [6]:
import pandas as pd
import hopsworks
import os
import requests

In [7]:
player_stats_gw_url = "https://fantasy.premierleague.com/api/event/{gw}/live/"
bootstrap_url = "https://fantasy.premierleague.com/api/bootstrap-static/"

In [8]:
try:
    with open('./hopsworks/hopsworks-api-key.txt', 'r') as file:
        os.environ["HOPSWORKS_API_KEY"] = file.read().rstrip()
except:
    print("In production mode")

project = hopsworks.login()
print(project.description)

2025-01-06 17:04:15,994 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-01-06 17:04:15,999 INFO: Initializing external client
2025-01-06 17:04:16,000 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-06 17:04:17,464 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1159321
Default project


In [9]:
general_info = requests.get(bootstrap_url).json()

In [10]:
# Get previous (and current) gameweek and check if it's finished
with open ('./state/prev_gameweek.txt', 'r') as file:
    prev_gameweek = file.readline()

prev_gameweek = next(
    (gw for gw in general_info['events'] if gw['id'] == int(prev_gameweek)), None
)
prev_finished = prev_gameweek['finished']

current_gameweek = next(
    (gw for gw in general_info['events'] if gw['is_current']), None
)

print(f"Previous gameweek: {prev_gameweek['id']}")

Previous gameweek: 19


In [11]:
lagged_features = ['minutes', 'goals_scored', 'assists', 'clean_sheets',
                   'goals_conceded', 'own_goals', 'penalties_saved',
                   'penalties_missed', 'yellow_cards', 'red_cards', 'saves',
                   'bonus', 'bps', 'influence', 'creativity', 'threat',
                   'ict_index', 'starts', 'expected_goals', 'expected_assists',
                   'expected_goal_involvements', 'expected_goals_conceded',
                   'in_dreamteam', 'total_points', 'selected', 'transfers_balance',
                   'value', 'was_home']

In [12]:
player_details_url = "https://fantasy.premierleague.com/api/element-summary/{element_id}"

In [None]:
if prev_gameweek["id"] != current_gameweek["id"] and prev_finished:


    prev_stats = player_stats_gw_url.format(gw=prev_gameweek["id"])
    prev_stats = requests.get(prev_stats).json()
    prev_stats_df = pd.DataFrame(prev_stats["elements"]).drop(columns=["explain", "modified"])
    prev_stats_df = pd.concat(
        [prev_stats_df.drop(columns=["stats"]), prev_stats_df["stats"].apply(pd.Series)],
        axis=1
    )
    prev_stats_df["gameweek"] = current_gameweek["id"]
    prev_stats_df.rename(columns={"total_points": "points"}, inplace=True)

    for i in range(1, len(prev_stats_df['id'].unique()) + 1):

        player_details_url_formatted = player_details_url.format(element_id=i)
        player_details = requests.get(player_details_url_formatted).json()
        player_details_history_df = pd.DataFrame(player_details["history"])

        total_points = player_details_history_df["total_points"].sum()

        row = player_details_history_df[player_details_history_df['round'] == prev_gameweek["id"]].iloc[0]

        prev_stats_df.loc[prev_stats_df['id'] == i, 'total_points'] = total_points
        prev_stats_df.loc[prev_stats_df['id'] == i, 'selected'] = row['selected']
        prev_stats_df.loc[prev_stats_df['id'] == i, 'transfers_balance'] = row['transfers_balance']
        prev_stats_df.loc[prev_stats_df['id'] == i, 'value'] = row['value']
        prev_stats_df.loc[prev_stats_df['id'] == i, 'was_home'] = row['was_home']


    prev_stats_df.rename(columns={col: f"prev_{col}" for col in prev_stats_df.columns if col not in ["id", "gameweek", "points"]}, inplace=True)

    prev_stats_df = prev_stats_df.dropna(subset=[f'prev_{col}' for col in lagged_features] + ['points'])

    prev_stats_df = prev_stats_df.astype({col: 'float64' for col in prev_stats_df.columns if col not in ["id", "gameweek", "points"]})

    fs = project.get_feature_store()

    player_fg = fs.get_feature_group(
        name="player_features",
        version=1,
    )

    player_fg.insert(prev_stats_df)

    print("Inserted data for gameweek", current_gameweek["id"])
    print(prev_stats_df)




Uploading Dataframe: 100.00% |██████████| Rows 709/709 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: player_features_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1159321/jobs/named/player_features_1_offline_fg_materialization/executions
Inserted data for gameweek 20


Unnamed: 0,id,prev_minutes,prev_goals_scored,prev_assists,prev_clean_sheets,prev_goals_conceded,prev_own_goals,prev_penalties_saved,prev_penalties_missed,prev_yellow_cards,...,prev_expected_goal_involvements,prev_expected_goals_conceded,points,prev_in_dreamteam,gameweek,prev_total_points,prev_selected,prev_transfers_balance,prev_value,prev_was_home
0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0,0.0,20,0.0,1149.0,-4.0,54.0,0.0
1,2,90.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.89,0.29,12,1.0,20,42.0,759564.0,-32056.0,68.0,0.0
2,3,90.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.31,0.29,2,0.0,20,83.0,3148709.0,128138.0,63.0,0.0
3,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0,0.0,20,74.0,1144511.0,24677.0,79.0,0.0
4,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0,0.0,20,0.0,3109.0,-72.0,40.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
704,705,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0,0.0,20,0.0,211.0,152.0,40.0,1.0
705,706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0,0.0,20,0.0,0.0,0.0,40.0,0.0
706,707,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0,0.0,20,0.0,0.0,0.0,45.0,0.0
707,708,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0,0.0,20,0.0,0.0,0.0,40.0,1.0
