# Inference Model

In [6]:
# Imports
import os
import sys
import subprocess
import pandas as pd
import glob
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import time
from datetime import datetime

# Model Imports
import category_encoders as ce
import xgboost as xgb
import sklearn as sk

In [11]:
# Input information is a date a time format with hour of the day. Encoding is done on the information to get 

dt_test = datetime(2024, 8, 15, 10, 30)

Information = {
    "time_info": dt_test,
    "PULocation": 100,
    "DOLocation": 100,
    "trip_distance": 1000.0,    
}

In [None]:
# Base Preprocess
def encode_info(data):
    
    date = data["time_info"]
    PULocationID = data["PULocation"]
    DOLocationID = data["DOLocation"]
    trip_distance = data["trip_distance"]
    
    # Get day number, month number
    day = date.day
    month = date.month
    hour = date.hour
    
    # Get day of the week
    day_of_week = date.weekday()
    
    # Get weekend or weekday
    weekend = 0
    if day_of_week == 5 or day_of_week == 6:
        weekend = 1
        
    data_encode = {
        "day": day,
        "month": month,
        "hour": hour,
        "day_of_week": day_of_week,
        "weekend": weekend,
        "PULocationID": PULocationID,
        "DOLocationID": DOLocationID,
        "trip_distance": trip_distance
    }
    
    # Print the encoded data for debugging
    print("Encoded data:")
    print(data_encode)
    
    return data_encode
    
    
# Circular Encoding
def encode_circular(data):
    
    day_of_week = data["day_of_week"]
    hour = data["hour"]
    
    # Circular encoding for day of week
    dow_sin = np.sin((day_of_week / 7) * 2 * np.pi)
    dow_cos = np.cos((day_of_week / 7) * 2 * np.pi)
    
    # Circular encoding for hour of the day
    hour_sin = np.sin((hour / 24) * 2 * np.pi)
    hour_cos = np.cos((hour / 24) * 2 * np.pi)
    
    # Create a new dictionary with the circular encoded values
    data_encode = {
        "dow_sin": dow_sin,
        "dow_cos": dow_cos,
        "hour_sin": hour_sin,
        "hour_cos": hour_cos,
        "day_of_month": data["day"],
        "month": data["month"],
        "weekend": data["weekend"],
        "trip_distance": data["trip_distance"],
        "PULocationID": data["PULocationID"],
        "DOLocationID": data["DOLocationID"]
    }
    
    return data_encode

def run_model_base_xgb(data, path):
    model = xgb.XGBRegressor()
    model.load_model(path)
    
    # Encode the input data
    data_encoded = encode_info(data)
    data_encoded = encode_circular(data_encoded)
    
    feature_names = model.get_booster().feature_names
    
    # Create a DataFrame with the encoded data
    data_df = pd.DataFrame([data_encoded], columns=feature_names)
    
    # Ensure the DataFrame has the same columns as the model
    data_df = data_df.reindex(columns=feature_names, fill_value=0)
    
    # Print the DataFrame for debugging
    print("DataFrame for prediction:")
    print(data_df)
    

In [15]:
run_model_base_xgb(Information, "models/total_amount_base_model.json")

Encoded data:
{'day': 15, 'month': 8, 'hour': 10, 'day_of_week': 3, 'weekend': 0, 'PULocationID': 100, 'DOLocationID': 100, 'trip_distance': 1000.0}
DataFrame for prediction:
   PULocationID  DOLocationID  trip_distance  day_of_month  month  weekend  \
0           100           100         1000.0            15      8        0   

        dow_sin  dow_cos  hour_sin  hour_cos  
0  1.224647e-16     -1.0  0.398401 -0.917211  
