# <span style="font-width:bold; font-size: 3rem; color:#1EB182;"> **Air Quality** </span><span style="font-width:bold; font-size: 3rem; color:#333;">- Part 04: Batch Inference</span>

## 🗒️ This notebook is divided into the following sections:

1. Download model and batch inference data
2. Make predictions, generate PNG for forecast
3. Store predictions in a monitoring feature group adn generate PNG for hindcast

## <span style='color:#ff5f27'> 📝 Imports

In [29]:
import datetime
import pandas as pd
from xgboost import XGBRegressor
import hopsworks
import json
from functions import util
import os
from importlib import reload

In [None]:
reload(util)

print(util.get_gameweek_from_date("2025-1-4"))

In [None]:
today = datetime.datetime.now() - datetime.timedelta(0)
tomorrow = today + datetime.timedelta(days = 1)

# Get the gameweek from the date
gameweek = util.get_gameweek_from_date(str(today.date()))
last_gameweek = gameweek - 1

gameweek
last_gameweek

## <span style="color:#ff5f27;"> 📡 Connect to Hopsworks Feature Store </span>

In [None]:
try:
    with open('./hopsworks/hopsworks-api-key.txt', 'r') as file:
        os.environ["HOPSWORKS_API_KEY"] = file.read().rstrip()
except:
    print("In production mode")

project = hopsworks.login()
fs = project.get_feature_store()

## <span style="color:#ff5f27;"> ⚙️ Feature View Retrieval</span>


In [None]:
# Not used
# feature_view = fs.get_feature_view(
#     name='player_score_fv',
#     version=1,
# )

## <span style="color:#ff5f27;">🪝 Download the model from Model Registry</span>

In [None]:
mr = project.get_model_registry()

retrieved_model = mr.get_model(
    name="player_score_xgboost_model",
    version=1,
)

# Download the saved model artifacts to a local directory
saved_model_dir = retrieved_model.download()

In [None]:
# Loading the XGBoost regressor model and label encoder from the saved model directory
# retrieved_xgboost_model = joblib.load(saved_model_dir + "/xgboost_regressor.pkl")
retrieved_xgboost_model = XGBRegressor()

retrieved_xgboost_model.load_model(saved_model_dir + "/model.json")

# Displaying the retrieved XGBoost regressor model
retrieved_xgboost_model

## <span style="color:#ff5f27;">✨ Get Player Score Features From Previous Gameweek </span>



In [None]:
players_fg = fs.get_feature_group(
    name='player_features',
    version=1,
)
input_data = players_fg.filter(players_fg.gameweek == last_gameweek).read()
output_data = players_fg.filter(players_fg.gameweek == gameweek).read()

### <span style="color:#ff5f27;">🤖 Making the predictions</span>

In [None]:
inputs = ['prev_minutes', 'prev_goals_scored', 'prev_assists',
         'prev_clean_sheets', 'prev_goals_conceded', 'prev_own_goals',
         'prev_penalties_saved', 'prev_penalties_missed', 'prev_yellow_cards',
         'prev_red_cards', 'prev_saves', 'prev_bonus',
         'prev_bps', 'prev_influence', 'prev_creativity',
         'prev_threat', 'prev_ict_index', 'prev_starts',
         'prev_expected_goals', 'prev_expected_assists', 'prev_expected_goal_involvements',
         'prev_expected_goals_conceded', 'prev_in_dreamteam']

for i in range(0, len(input_data)):
    output_data.at[i, 'predicted_score'] = retrieved_xgboost_model.predict(input_data.loc[i, inputs].values.reshape(1, -1))[0]

output_data

In [None]:
output_data.info()

### <span style="color:#ff5f27;">🤖 Saving the predictions (for monitoring) to a Feature Group</span>

### Create Forecast Graph
Draw a graph of the predictions with dates as a PNG and save it to the github repo
Show it on github pages

In [None]:
file_path = "./player_score_model/images/predicted_scores.png"
plt = util.plot_player_score_forecast("FPL Player scores", output_data, file_path, hindcast=False)
plt.show()

In [40]:
# Get or create feature group
monitor_fg = fs.get_or_create_feature_group(
    name='fpl_predictions',
    description='FPL player score prediction monitoring',
    version=1,
    primary_key=['id', 'gameweek']
)

In [None]:
monitor_fg.insert(output_data, write_options={"wait_for_job": True})

In [42]:
# We will create a hindcast chart for  only the forecasts made 1 day beforehand
# monitoring_df = monitor_fg.filter(monitor_fg.days_before_forecast_day == 1).read()
# monitoring_df

In [43]:
# air_quality_fg = fs.get_feature_group(
#     name='air_quality',
#     version=1,
# )
# air_quality_df = air_quality_fg.read()
# air_quality_df

In [44]:
# from functions.util import backfill_predictions_for_monitoring

# outcome_df = air_quality_df[['date', 'pm25']]
# preds_df =  monitoring_df[['date', 'predicted_pm25']]

# hindcast_df = pd.merge(preds_df, outcome_df, on="date")
# hindcast_df = hindcast_df.sort_values(by=['date'], ascending=False)

# # If there are no outcomes for predictions yet, generate some predictions/outcomes from existing data
# if len(hindcast_df) == 0:
#     hindcast_df = backfill_predictions_for_monitoring(weather_fg, air_quality_df, monitor_fg, retrieved_xgboost_model, today)
# hindcast_df

### Plot the Hindcast comparing predicted with forecasted values (1-day prior forecast)

__This graph will be empty to begin with - this is normal.__

After a few days of predictions and observations, you will get data points in this graph.

In [45]:
# file_path = "../../docs/assets/images/pm25_hindcast.png"
# plt = util.plot_air_quality_forecast(city, street, hindcast_df, file_path, hindcast=True)
# plt.show()

---