### Using the production environment

In [1]:
!where pip

D:\Personal\self study\uber fare prediction\production\virtualenv_prod_uber_fare\Scripts\pip.exe
C:\Users\Lenovo\AppData\Local\Programs\Python\Python312\Scripts\pip.exe


In [3]:
import pandas as pd
import pickle
import datetime
import numpy as np
import copy
from math import radians, sin, cos, sqrt, atan2

### Reading one data point (Just for testing the pipeline)

In [38]:
data = pd.read_csv('../uber.csv')
data = data[:1]
data.drop(columns = ['Unnamed: 0','key','fare_amount'], inplace = True)

In [39]:
data

Unnamed: 0,pickup_datetime,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,passenger_count
0,2015-05-07 19:52:06 UTC,-73.999817,40.738354,-73.999512,40.723217,1


### Data Processing

In [40]:
def haversine_distance(long1,lat1, long2, lat2):    
    # Convert latitude and longitude to radians
    lat1_rad = radians(lat1)
    long1_rad = radians(long1)
    lat2_rad = radians(lat2)
    long2_rad = radians(long2)

    # Haversine formula
    dlong = long2_rad - long1_rad
    dlat = lat2_rad - lat1_rad
    a = sin(dlat/2)**2 + cos(lat1_rad) * cos(lat2_rad) * sin(dlong/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1-a))
    distance = 6371 * c  # Earth radius in kilometers
    return distance

to get the same training features names

In [46]:
model.get_booster().feature_names

['passenger_count',
 'distance_km',
 'dayofyear_sin',
 'dayofyear_cos',
 'dayofweek_sin',
 'dayofweek_cos',
 'time_sin',
 'time_cos']

In [41]:
def data_processing(data):
    df = copy.deepcopy(data)
    df.pickup_datetime = pd.to_datetime(df.pickup_datetime)
    df['dayofyear'] = df.pickup_datetime.dt.dayofyear
    df['dayofweek'] = df.pickup_datetime.dt.weekday
    df['time'] = df.pickup_datetime.dt.time
    df['distance_km'] = df.apply(lambda x: haversine_distance(x.pickup_longitude, x.pickup_latitude, x.dropoff_longitude, x.dropoff_latitude) ,axis = 1 )
    df.drop(columns = ['pickup_datetime','pickup_longitude','pickup_latitude','dropoff_longitude', 'dropoff_latitude' ], inplace = True)
    df['dayofyear_sin'] = np.sin(2*np.pi*df['dayofyear']/366)
    df['dayofyear_cos'] = np.cos(2*np.pi*df['dayofyear']/366)
    df['dayofweek_sin'] = np.sin(2*np.pi*df['dayofweek']/6)
    df['dayofweek_cos'] = np.cos(2*np.pi*df['dayofweek']/6)
    df['total_seconds'] = df['time'].apply(lambda t: t.hour * 3600 + t.minute * 60 + t.second )
    df['time_sin'] = np.sin(2*np.pi*df['total_seconds']/86400)
    df['time_cos'] = np.cos(2*np.pi*df['total_seconds']/86400)
    final_df = df.drop(columns = ['dayofyear','dayofweek','time','total_seconds']).reset_index(drop = True)
    return final_df


### Importing the production model

In [4]:
with open('./production_model.bin' , 'rb') as f_in:
    model = pickle.load(f_in)

In [31]:
def predict(raw_data):
    features = data_processing(raw_data)
    prediction = model.predict(features)
    return prediction

In [42]:
predict(data)

array([7.4559135], dtype=float32)