# <span style="font-width:bold; font-size: 3rem; color:#1EB182;"> **FPL player score** </span><span style="font-width:bold; font-size: 3rem; color:#333;">- Part 04: Batch Inference</span>

## 🗒️ This notebook is divided into the following sections:

1. Download model and batch inference data
2. Make predictions, combine predictions with static data
3. Store predictions in a monitoring feature group

## <span style='color:#ff5f27'> 📝 Imports

In [156]:
import datetime
import pandas as pd
from xgboost import XGBRegressor
import hopsworks
import json
from functions import util
import os
from importlib import reload
import requests

In [157]:
reload(util)

print(util.get_gameweek_from_date("2025-1-4"))

20


In [158]:
today = datetime.datetime.now() - datetime.timedelta(0)
tomorrow = today + datetime.timedelta(days = 1)

# Get the gameweek from the date
gameweek = util.get_gameweek_from_date(str(today.date()))
last_gameweek = gameweek - 1

print(gameweek, last_gameweek)

20 19


## <span style="color:#ff5f27;"> 📡 Connect to Hopsworks Feature Store </span>

In [159]:
try:
    with open('./hopsworks/hopsworks-api-key.txt', 'r') as file:
        os.environ["HOPSWORKS_API_KEY"] = file.read().rstrip()
except:
    print("In production mode")

project = hopsworks.login()
fs = project.get_feature_store()

2025-01-06 19:25:09,585 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-01-06 19:25:09,590 INFO: Initializing external client
2025-01-06 19:25:09,591 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-06 19:25:11,000 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1159321


## <span style="color:#ff5f27;">🪝 Download the model from Model Registry</span>

In [160]:
mr = project.get_model_registry()

retrieved_model = mr.get_model(
    name="player_score_xgboost_model",
)

# Download the saved model artifacts to a local directory
saved_model_dir = retrieved_model.download()


Downloading model artifact (1 dirs, 4 files)... DONE

In [161]:
# Loading the XGBoost regressor model and label encoder from the saved model directory
# retrieved_xgboost_model = joblib.load(saved_model_dir + "/xgboost_regressor.pkl")
retrieved_xgboost_model = XGBRegressor()

retrieved_xgboost_model.load_model(saved_model_dir + "/model.json")

# Displaying the retrieved XGBoost regressor model
retrieved_xgboost_model

In [162]:
bootstrap_url = "https://fantasy.premierleague.com/api/bootstrap-static/"
general_info = requests.get(bootstrap_url).json()

with open ('./state/prev_gameweek.txt', 'r') as file:
    prev_gameweek = file.readline()

prev_gameweek = next(
    (gw for gw in general_info['events'] if gw['id'] == int(prev_gameweek)), None
)
prev_finished = prev_gameweek['finished']

current_gameweek = next(
    (gw for gw in general_info['events'] if gw['is_current']), None
)
print(current_gameweek)

{'id': 20, 'name': 'Gameweek 20', 'deadline_time': '2025-01-04T11:00:00Z', 'release_time': None, 'average_entry_score': 52, 'finished': False, 'data_checked': False, 'highest_scoring_entry': 11079452, 'deadline_time_epoch': 1735988400, 'deadline_time_game_offset': 0, 'highest_score': 130, 'is_previous': False, 'is_current': True, 'is_next': False, 'cup_leagues_created': True, 'h2h_ko_matches_created': True, 'can_enter': False, 'can_manage': False, 'released': True, 'ranked_count': 11058876, 'overrides': {'rules': {}, 'scoring': {}, 'element_types': [], 'pick_multiplier': None}, 'chip_plays': [{'chip_name': 'bboost', 'num_played': 73435}, {'chip_name': 'freehit', 'num_played': 67748}, {'chip_name': 'wildcard', 'num_played': 412768}, {'chip_name': '3xc', 'num_played': 141464}], 'most_selected': 328, 'most_transferred_in': 447, 'top_element': 99, 'top_element_info': {'id': 99, 'points': 19}, 'transfers_made': 13978697, 'most_captained': 328, 'most_vice_captained': 182}


## <span style="color:#ff5f27;">✨ Get Player Score Features From Previous Gameweek </span>



In [163]:
print(prev_gameweek["id"], current_gameweek["id"])
if prev_gameweek["id"] != current_gameweek["id"] and prev_finished:
    players_fg = fs.get_feature_group(
        name='player_features',
        version=1,
    )
    players = players_fg.filter(players_fg.gameweek == gameweek).read()
    players.head()

20 20


### <span style="color:#ff5f27;">🤖 Making the predictions</span>

In [None]:
inputs = ['prev_minutes', 'prev_goals_scored', 'prev_assists',
         'prev_clean_sheets', 'prev_goals_conceded', 'prev_own_goals',
         'prev_penalties_saved', 'prev_penalties_missed', 'prev_yellow_cards',
         'prev_red_cards', 'prev_saves', 'prev_bonus',
         'prev_bps', 'prev_influence', 'prev_creativity',
         'prev_threat', 'prev_ict_index', 'prev_starts',
         'prev_expected_goals', 'prev_expected_assists', 'prev_expected_goal_involvements',
         'prev_expected_goals_conceded', 'prev_in_dreamteam', 'prev_total_points',
         'prev_selected', 'prev_transfers_balance', 'prev_value', 'prev_was_home']

#for i in range(0, len(input_data)):
#    output_data.at[i, 'predicted_score'] = retrieved_xgboost_model.predict(input_data.loc[i, inputs].values.reshape(1, -1))[0]

if prev_gameweek["id"] != current_gameweek["id"] and prev_finished:
    players['predicted_score'] = retrieved_xgboost_model.predict(players[inputs]).astype("float64")
    players.head()

Unnamed: 0,id,points,gameweek,prev_minutes,prev_goals_scored,prev_assists,prev_clean_sheets,prev_goals_conceded,prev_own_goals,prev_penalties_saved,...,prev_expected_assists,prev_expected_goal_involvements,prev_expected_goals_conceded,prev_in_dreamteam,prev_total_points,prev_selected,prev_transfers_balance,prev_value,prev_was_home,predicted_score
0,253,0,20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,597.0,-6.0,45.0,1.0,0.0619
1,254,0,20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,6.0,2861.0,-64.0,47.0,1.0,0.203691
2,489,2,20,90.0,0.0,0.0,0.0,2.0,0.0,0.0,...,0.01,0.01,0.72,0.0,20.0,21363.0,2804.0,46.0,1.0,2.052892
3,252,9,20,67.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.36,1.52,0.0,87.0,621798.0,6797.0,55.0,1.0,3.858825
4,255,4,20,90.0,0.0,1.0,0.0,2.0,0.0,0.0,...,0.22,0.22,1.98,0.0,67.0,2496023.0,86649.0,49.0,1.0,3.773018


In [166]:
if prev_gameweek["id"] != current_gameweek["id"] and prev_finished:
    players.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 709 entries, 0 to 708
Data columns (total 32 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   id                               709 non-null    int64  
 1   points                           709 non-null    int64  
 2   gameweek                         709 non-null    int64  
 3   prev_minutes                     709 non-null    float64
 4   prev_goals_scored                709 non-null    float64
 5   prev_assists                     709 non-null    float64
 6   prev_clean_sheets                709 non-null    float64
 7   prev_goals_conceded              709 non-null    float64
 8   prev_own_goals                   709 non-null    float64
 9   prev_penalties_saved             709 non-null    float64
 10  prev_penalties_missed            709 non-null    float64
 11  prev_yellow_cards                709 non-null    float64
 12  prev_red_cards        

### <span style="color:#ff5f27;">🤖 Saving the predictions (for monitoring) to a Feature Group</span>

### Create Score Graph
Draw a graph of the predictions with all the players as a PNG and save it to the github

In [167]:
if prev_gameweek["id"] != current_gameweek["id"] and prev_finished:
    file_path = "./player_score_model/images/predicted_scores.png"
    plt = util.plot_player_score_forecast("FPL Player scores", players, file_path)
    plt.show()

In [168]:
if prev_gameweek["id"] != current_gameweek["id"] and prev_finished:
    # Get or create feature group
    monitor_fg = fs.get_or_create_feature_group(
        name='fpl_predictions',
        description='FPL player score prediction monitoring',
        version=1,
        primary_key=['id', 'gameweek']
    )

In [169]:
if prev_gameweek["id"] != current_gameweek["id"] and prev_finished:
    # Only keep the relevant columns
    output_data = players[['id', 'points', 'gameweek', 'predicted_score']]

    output_data.dtypes

In [170]:
if prev_gameweek["id"] != current_gameweek["id"] and prev_finished:
    # Add static data
    for i in range(0, len(output_data)):
        player_id = output_data.loc[i, 'id']

        player_data = util.get_player_info(player_id,general_info)
        for key in player_data:
            output_data.at[i, key] = player_data[key]

In [172]:
if prev_gameweek["id"] != current_gameweek["id"] and prev_finished:
    monitor_fg.insert(output_data)

In [173]:
if prev_gameweek["id"] != current_gameweek["id"] and prev_finished:
    monitor_fg.update_feature_description("id", "Player ID")
    monitor_fg.update_feature_description("points", "Total points of the player in the gameweek (label)")
    monitor_fg.update_feature_description("gameweek", "Gameweek")
    monitor_fg.update_feature_description("predicted_score", "Predicted score of the player in the gameweek")
    monitor_fg.update_feature_description("first_name", "Player first name")
    monitor_fg.update_feature_description("second_name", "Player surname")
    monitor_fg.update_feature_description("team", "Player team")
    monitor_fg.update_feature_description("position", "Player position")

    # Update previous gameweek so we dont run inference again for the same gameweek
    with open ('./state/prev_gameweek.txt', 'w') as file:
        file.write(str(current_gameweek["id"]))


---