In [None]:
"""

    Helper functions for the pretrained model to be used within our API.

    Author: Explore Data Science Academy.

    Note:  
    ---------------------------------------------------------------------
    Please follow the instructions provided within the README.md file
    located within this directory for guidance on how to use this script
    correctly.

    Importantly, you will need to modify this file by adding
    your own data preprocessing steps within the `_preprocess_data()`
    function.
    ----------------------------------------------------------------------

    Description: This file contains several functions used to abstract aspects
    of model interaction within the API. This includes loading a model from
    file, data preprocessing, and model prediction.  

"""

# Helper Dependencies
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import pickle
import json

def _preprocess_data(data):
    """Private helper function to preprocess data for model prediction.
    
    df_train = pd.read_csv('df_train.csv') # load the train data
    df_test = pd.read_csv('df_test.csv')  # load the test data

    NB: If you have utilised feature engineering/selection in order to create
    your final model you will need to define the code here.


    Parameters
    ----------
    data : str
        The data payload received within POST requests sent to our API.

    Returns
    -------
    Pandas DataFrame : <class 'pandas.core.frame.DataFrame'>
        The preprocessed data, ready to be used our model for prediction.
    """
    # Convert the json string to a python dictionary object
    feature_vector_dict = json.loads(data)
    # Load the dictionary as a Pandas DataFrame.
    feature_vector_df = pd.DataFrame.from_dict([feature_vector_dict])

    # ---------------------------------------------------------------
    # NOTE: You will need to swap the lines below for your own data
    # preprocessing methods.
    #
    # The code below is for demonstration purposes only. You will not
    # receive marks for submitting this code in an unchanged state.
    # ---------------------------------------------------------------

    # ----------- Replace this code with your own preprocessing steps --------
        #imputing missing values
    df_test['Valencia_pressure'] = df_test['Valencia_pressure'].fillna(df_test['Valencia_pressure'].mode()[0])
    # impute the mode
    df_train['Valencia_pressure'] = df_train['Valencia_pressure'].fillna(df_train['Valencia_pressure'].mode()[0])

    # extracting the number from the string 
    df_test['Valencia_wind_deg'] = df_test['Valencia_wind_deg'].str.extract('(\d+)').astype('int64')

# extracting the number from the string 
    df_train['Valencia_wind_deg'] = df_train['Valencia_wind_deg'].str.extract('(\d+)').astype('int64')

# change the train data type to integer
    df_test['Valencia_wind_deg'] = pd.to_numeric(df_test['Valencia_wind_deg'])
 

    # change the test data type to integer
    df_train['Valencia_wind_deg'] = pd.to_numeric(df_train['Valencia_wind_deg'])
 

    # extracting the number from the string 
    df_train['Seville_pressure'] = df_train['Seville_pressure'].str.extract('(\d+)').astype('int64')

# extracting the number from the string 
    df_test['Seville_pressure'] = df_test['Seville_pressure'].str.extract('(\d+)').astype('int64')

# change the data type to integer
    df_test['Seville_pressure'] = pd.to_numeric(df_test['Seville_pressure'])
 
    # change the data type to integer
    df_train['Seville_pressure'] = pd.to_numeric(df_train['Seville_pressure'])
 
    df_train['Year']  = df_train['time'].astype('datetime64').dt.year
    df_train['Month_of_year']  = df_train['time'].astype('datetime64').dt.month
    df_train['Week_of_year'] = df_train['time'].astype('datetime64').dt.weekofyear
    df_train['Day_of_year']  = df_train['time'].astype('datetime64').dt.dayofyear
    df_train['Day_of_month']  = df_train['time'].astype('datetime64').dt.day
    df_train['Day_of_week'] = df_train['time'].astype('datetime64').dt.dayofweek
    df_train['Hour_of_week'] = ((df_train['time'].astype('datetime64').dt.dayofweek) * 24 + 24) - (24 - df_train['time'].astype('datetime64').dt.hour)
    df_train['Hour_of_day']  = df_train['time'].astype('datetime64').dt.hour

    df_train = df_train.drop(columns=['Week_of_year','Day_of_year','Hour_of_week', 'Unnamed: 0','time'])
    df_test = df_test.drop(columns=['Week_of_year','Day_of_year','Hour_of_week', 'Unnamed: 0','time'])
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(df_train)
    X_scaled = pd.DataFrame(X_scaled,columns=X.columns)
    
    predict_vector = feature_vector_df[['Madrid_wind_speed', 'Valencia_wind_deg', 'Bilbao_rain_1h',
       'Valencia_wind_speed', 'Seville_humidity', 'Madrid_humidity',
       'Bilbao_clouds_all', 'Bilbao_wind_speed', 'Seville_clouds_all',
       'Bilbao_wind_deg', 'Barcelona_wind_speed', 'Barcelona_wind_deg',
       'Madrid_clouds_all', 'Seville_wind_speed', 'Barcelona_rain_1h',
       'Seville_pressure', 'Seville_rain_1h', 'Bilbao_snow_3h',
       'Barcelona_pressure', 'Seville_rain_3h', 'Madrid_rain_1h',
       'Barcelona_rain_3h', 'Valencia_snow_3h', 'Madrid_weather_id',
       'Barcelona_weather_id', 'Bilbao_pressure', 'Seville_weather_id',
       'Valencia_pressure', 'Seville_temp_max', 'Bilbao_weather_id', 
        'Valencia_humidity', 'Year', 'Month_of_year', 'Day_of_month', 'Day_of_week', 'Hour_of_day']]
    # ------------------------------------------------------------------------

    return predict_vector

def load_model(path_to_model:str):
    """Adapter function to load our pretrained model into memory.

    Parameters
    ----------
    path_to_model : str
        The relative path to the model weights/schema to load.
        Note that unless another file format is used, this needs to be a
        .pkl file.

    Returns
    -------
    <class: sklearn.estimator>
        The pretrained model loaded into memory.

    """
    return pickle.load(open(path_to_model, 'rb'))


""" You may use this section (above the make_prediction function) of the python script to implement 
    any auxiliary functions required to process your model's artifacts.
"""

def make_prediction(data, model):
    """Prepare request data for model prediction.

    Parameters
    ----------
    data : str
        The data payload received within POST requests sent to our API.
    model : <class: sklearn.estimator>
        An sklearn model object.

    Returns
    -------
    list
        A 1-D python list containing the model prediction.

    """
    # Data preprocessing.
    prep_data = _preprocess_data(data)
    # Perform prediction with model and preprocessed data.
    prediction = model.predict(prep_data)
    # Format as list for output standardisation.
    return prediction[0].tolist()


In [1]:
# Helper Dependencies
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import pickle
import json

def _preprocess_data(data):
    """Private helper function to preprocess data for model prediction.
    
    df_train = pd.read_csv('df_train.csv') # load the train data
    df_test = pd.read_csv('df_test.csv')  # load the test data

    NB: If you have utilised feature engineering/selection in order to create
    your final model you will need to define the code here.


    Parameters
    ----------
    data : str
        The data payload received within POST requests sent to our API.

    Returns
    -------
    Pandas DataFrame : <class 'pandas.core.frame.DataFrame'>
        The preprocessed data, ready to be used our model for prediction.
    """
    # Convert the json string to a python dictionary object
    feature_vector_dict = json.loads(data)
    # Load the dictionary as a Pandas DataFrame.
    feature_vector_df = pd.DataFrame.from_dict([feature_vector_dict])

    # ---------------------------------------------------------------
    # NOTE: You will need to swap the lines below for your own data
    # preprocessing methods.
    #
    # The code below is for demonstration purposes only. You will not
    # receive marks for submitting this code in an unchanged state.
    # ---------------------------------------------------------------

    # ----------- Replace this code with your own preprocessing steps --------
        #imputing missing values
    # impute the mode
    df_train['Valencia_pressure'] = df_train['Valencia_pressure'].fillna(df_train['Valencia_pressure'].mode()[0])


# extracting the number from the string 
    df_train['Valencia_wind_deg'] = df_train['Valencia_wind_deg'].str.extract('(\d+)').astype('int64')


    # change the test data type to integer
    df_train['Valencia_wind_deg'] = pd.to_numeric(df_train['Valencia_wind_deg'])
 

    # extracting the number from the string 
    df_train['Seville_pressure'] = df_train['Seville_pressure'].str.extract('(\d+)').astype('int64')


 
    # change the data type to integer
    df_train['Seville_pressure'] = pd.to_numeric(df_train['Seville_pressure'])
 
    df_train['Year']  = df_train['time'].astype('datetime64').dt.year
    df_train['Month_of_year']  = df_train['time'].astype('datetime64').dt.month
    df_train['Week_of_year'] = df_train['time'].astype('datetime64').dt.weekofyear
    df_train['Day_of_year']  = df_train['time'].astype('datetime64').dt.dayofyear
    df_train['Day_of_month']  = df_train['time'].astype('datetime64').dt.day
    df_train['Day_of_week'] = df_train['time'].astype('datetime64').dt.dayofweek
    df_train['Hour_of_week'] = ((df_train['time'].astype('datetime64').dt.dayofweek) * 24 + 24) - (24 - df_train['time'].astype('datetime64').dt.hour)
    df_train['Hour_of_day']  = df_train['time'].astype('datetime64').dt.hour

    df_train = df_train.drop(columns=['Week_of_year','Day_of_year','Hour_of_week', 'Unnamed: 0','time'])

    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(df_train)
    X_scaled = pd.DataFrame(X_scaled,columns=df_train.columns)
    
    predict_vector = X_scaled[['Madrid_wind_speed', 'Valencia_wind_deg', 'Bilbao_rain_1h',
       'Valencia_wind_speed', 'Seville_humidity', 'Madrid_humidity',
       'Bilbao_clouds_all', 'Bilbao_wind_speed', 'Seville_clouds_all',
       'Bilbao_wind_deg', 'Barcelona_wind_speed', 'Barcelona_wind_deg',
       'Madrid_clouds_all', 'Seville_wind_speed', 'Barcelona_rain_1h',
       'Seville_pressure', 'Seville_rain_1h', 'Bilbao_snow_3h',
       'Barcelona_pressure', 'Seville_rain_3h', 'Madrid_rain_1h',
       'Barcelona_rain_3h', 'Valencia_snow_3h', 'Madrid_weather_id',
       'Barcelona_weather_id', 'Bilbao_pressure', 'Seville_weather_id',
       'Valencia_pressure', 'Seville_temp_max', 'Madrid_pressure',
       'Valencia_temp_max', 'Valencia_temp', 'Bilbao_weather_id',
       'Seville_temp', 'Valencia_humidity', 'Valencia_temp_min',
       'Barcelona_temp_max', 'Madrid_temp_max', 'Barcelona_temp',
       'Bilbao_temp_min', 'Bilbao_temp', 'Barcelona_temp_min',
       'Bilbao_temp_max', 'Seville_temp_min', 'Madrid_temp', 'Madrid_temp_min',
       'Year', 'Month_of_year', 'Day_of_month', 'Day_of_week', 'Hour_of_day']]
    # ------------------------------------------------------------------------

    return predict_vector

In [3]:
X_scaled.shape

NameError: name 'X_scaled' is not defined