In [36]:
import pandas as pd
import hopsworks
import os
import requests

In [37]:
player_stats_gw_url = "https://fantasy.premierleague.com/api/event/{gw}/live/"
bootstrap_url = "https://fantasy.premierleague.com/api/bootstrap-static/"

In [38]:
try:
    with open('./hopsworks/hopsworks-api-key.txt', 'r') as file:
        os.environ["HOPSWORKS_API_KEY"] = file.read().rstrip()
except:
    print("In production mode")

project = hopsworks.login()
print(project.description)

2025-01-08 16:42:44,494 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-01-08 16:42:44,498 INFO: Initializing external client
2025-01-08 16:42:44,499 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-08 16:42:45,987 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1159321
Default project


In [39]:
general_info = requests.get(bootstrap_url).json()
general_stats = pd.DataFrame(general_info["elements"])
general_stats

Unnamed: 0,can_transact,can_select,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,...,now_cost_rank,now_cost_rank_type,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90
0,True,False,0.0,0.0,438098,0,0,-1,1,0,...,131,78,695,310,708,317,650,283,0.00,0.00
1,True,True,100.0,100.0,205651,0,0,-1,1,2,...,29,14,12,2,191,25,38,10,0.90,0.30
2,True,True,100.0,100.0,226597,0,0,4,-4,2,...,51,3,43,12,27,2,9,2,1.05,0.35
3,True,True,100.0,75.0,219847,-1,1,-2,2,1,...,13,5,103,18,38,11,42,12,1.03,0.36
4,True,False,0.0,0.0,463748,0,0,0,0,0,...,640,59,452,55,536,61,570,68,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
713,True,True,75.0,75.0,509291,0,0,0,0,0,...,223,141,189,87,314,142,595,249,1.20,0.18
714,True,True,,,463212,0,0,0,0,0,...,116,70,325,168,473,227,571,231,0.38,0.00
715,True,True,,,517179,0,0,-1,1,0,...,713,240,418,149,396,130,228,87,0.00,0.00
716,True,True,,,596054,0,0,0,0,0,...,466,294,623,262,676,296,626,267,0.00,0.00


In [40]:
fs = project.get_feature_store()

In [41]:
player_fg = fs.get_feature_group("player_features")




In [42]:
prev_gameweek = player_fg.select(["gameweek"]).read().max()

prev_gameweek = next(
    (gw for gw in general_info['events'] if gw['id'] == int(prev_gameweek)), None
)
prev_finished = prev_gameweek['finished']
prev_finished

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.89s) 



True

In [43]:
lagged_features = ['minutes', 'goals_scored', 'assists', 'clean_sheets',
                   'goals_conceded', 'own_goals', 'penalties_saved',
                   'penalties_missed', 'yellow_cards', 'red_cards', 'saves',
                   'bonus', 'bps', 'influence', 'creativity', 'threat',
                   'ict_index', 'starts', 'expected_goals', 'expected_assists',
                   'expected_goal_involvements', 'expected_goals_conceded',
                   'in_dreamteam', 'total_points', 'selected', 'transfers_balance',
                   'value', 'was_home', 'opponent_team']

In [44]:
player_details_url = "https://fantasy.premierleague.com/api/element-summary/{element_id}"

In [45]:
if prev_finished:
    prev_stats = player_stats_gw_url.format(gw=prev_gameweek["id"])
    prev_stats = requests.get(prev_stats).json()
    prev_stats_df = pd.DataFrame(prev_stats["elements"]).drop(columns=["explain", "modified"])
    prev_stats_df = pd.concat(
        [prev_stats_df.drop(columns=["stats"]), prev_stats_df["stats"].apply(pd.Series)],
        axis=1
    )
    prev_stats_df.rename(columns={"total_points": "points"}, inplace=True)

In [35]:
if prev_finished:
    existing_data = player_fg.read()

    existing_data = existing_data.set_index('id')
    prev_stats_df = prev_stats_df.set_index('id')

    # Update the "points" column for the specified gameweek
    existing_data.loc[existing_data['gameweek'] == prev_gameweek["id"], 'points'] = prev_stats_df['points']

    # Reset index if needed
    existing_data = existing_data.reset_index()
    prev_stats_df = prev_stats_df.reset_index()

    # Save the updated data back to the feature group
    player_fg.insert(existing_data[existing_data["gameweek"] == prev_gameweek["id"]])
    print(f"Updated player points for gameweek {prev_gameweek['id']}")

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (2.07s) 


Uploading Dataframe: 100.00% |██████████| Rows 705/705 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: player_features_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1159321/jobs/named/player_features_1_offline_fg_materialization/executions
Updated player points for gameweek 19


In [47]:
if prev_finished:

    for i in range(1, len(prev_stats_df['id'].unique()) + 1):

        player_details_url_formatted = player_details_url.format(element_id=i)
        player_details = requests.get(player_details_url_formatted).json()
        player_details_history_df = pd.DataFrame(player_details["history"])

        total_points = player_details_history_df["total_points"].sum()

        row = player_details_history_df[player_details_history_df['round'] == prev_gameweek["id"]].iloc[0]

        prev_stats_df.loc[prev_stats_df['id'] == i, 'total_points'] = total_points
        prev_stats_df.loc[prev_stats_df['id'] == i, 'selected'] = row['selected']
        prev_stats_df.loc[prev_stats_df['id'] == i, 'transfers_balance'] = row['transfers_balance']
        prev_stats_df.loc[prev_stats_df['id'] == i, 'value'] = row['value']
        prev_stats_df.loc[prev_stats_df['id'] == i, 'was_home'] = row['was_home']
        prev_stats_df.loc[prev_stats_df['id'] == i, 'opponent_team'] = row['opponent_team']


    prev_stats_df.rename(columns={col: f"prev_{col}" for col in prev_stats_df.columns if col not in ["id", "gameweek", "points"]}, inplace=True)

    prev_stats_df = prev_stats_df.dropna(subset=[f'prev_{col}' for col in lagged_features] + ['points'])


    prev_stats_df = prev_stats_df.astype({col: 'float64' for col in prev_stats_df.columns if col not in ["id", "gameweek", "points"]})


    prev_stats_df["gameweek"] = prev_gameweek["id"] + 1

    # Merge the general stats DataFrame with the all_gameweeks_df DataFrame
    prev_stats_df = prev_stats_df.merge(general_stats[["id","element_type"]], on="id", how="left")
    
    fs = project.get_feature_store()

    player_fg = fs.get_feature_group(
        name="player_features",
        version=1,
    )

    player_fg.insert(prev_stats_df)

    print("Inserted data for gameweek", prev_gameweek["id"] + 1)




FeatureStoreException: Features are not compatible with Feature Group schema: 
 - gameweek (type: 'bigint') is missing from input dataframe.
Note that feature (or column) names are case insensitive and spaces are automatically replaced with underscores.