In [151]:
import pickle
from sklearn.preprocessing import LabelEncoder, StandardScaler
import pandas as pd

In [152]:
transformer = pickle.load(open("artifacts/transformed.pkl", "rb"))
transformer

In [153]:
model = pickle.load(open("artifacts/model.pkl", "rb"))
model

In [174]:
og_df = pd.read_csv("../data/uber.csv", parse_dates=['pickup_datetime'])
og_df.drop(['Unnamed: 0','key'], axis=1, inplace=True)
og_df.head(1)

Unnamed: 0,fare_amount,pickup_datetime,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,passenger_count
0,7.5,2015-05-07 19:52:06+00:00,-73.999817,40.738354,-73.999512,40.723217,1


In [155]:
# collected data
pickup_datetime = "2015-05-07 19:52:06+00:00"
df = pd.DataFrame({"pickup_datetime": [pd.to_datetime(pickup_datetime)],'pickup_longitude':-73.999817,"pickup_latitude":40.738354,"dropoff_longitude":-73.999512,"dropoff_latitude":40.723217,'passenger_count':1})

In [156]:
df

Unnamed: 0,pickup_datetime,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,passenger_count
0,2015-05-07 19:52:06+00:00,-73.999817,40.738354,-73.999512,40.723217,1


In [157]:
df.dtypes

pickup_datetime      datetime64[ns, UTC]
pickup_longitude                 float64
pickup_latitude                  float64
dropoff_longitude                float64
dropoff_latitude                 float64
passenger_count                    int64
dtype: object

In [158]:
df['year'] = df.pickup_datetime.dt.year
df['month'] = df.pickup_datetime.dt.month
df['weekday'] = df.pickup_datetime.dt.weekday
df['hour'] = df.pickup_datetime.dt.hour

In [159]:
df['Monthly_Quarter'] = df.month.map({1:'Q1',2:'Q1',3:'Q1',4:'Q2',5:'Q2',6:'Q2',7:'Q3',
                                      8:'Q3',9:'Q3',10:'Q4',11:'Q4',12:'Q4'})

df['Hourly_Segments'] = df.hour.map({0:'H1',1:'H1',2:'H1',3:'H1',4:'H2',5:'H2',6:'H2',7:'H2',8:'H3',
                                     9:'H3',10:'H3',11:'H3',12:'H4',13:'H4',14:'H4',15:'H4',16:'H5',
                                     17:'H5',18:'H5',19:'H5',20:'H6',21:'H6',22:'H6',23:'H6'})

In [160]:
df.drop(['pickup_datetime','month', 'hour',], axis=1, inplace=True)

#### Calculate Distance

In [161]:
from math import radians, sin, cos, sqrt, asin

def distance_transform(longitude1, latitude1, longitude2, latitude2):
    travel_dist = []
    
    for pos in range(len(longitude1)):
        long1,lati1,long2,lati2 = map(radians,[longitude1[pos],latitude1[pos],longitude2[pos],latitude2[pos]])
        dist_long = long2 - long1
        dist_lati = lati2 - lati1
        a = sin(dist_lati/2)**2 + cos(lati1) * cos(lati2) * sin(dist_long/2)**2
        c = 2 * asin(sqrt(a))*6371
        travel_dist.append(c)
       
    return travel_dist

In [162]:
df['distance_traveled']=distance_transform(df['pickup_longitude'].to_numpy(),
                                  df['pickup_latitude'].to_numpy(),
                                  df['dropoff_longitude'].to_numpy(),
                                  df['dropoff_latitude'].to_numpy())
## This Distance is in kilometers
df['distance_traveled'] = df['distance_traveled'].round(3)

In [163]:
df

Unnamed: 0,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,passenger_count,year,weekday,Monthly_Quarter,Hourly_Segments,distance_traveled
0,-73.999817,40.738354,-73.999512,40.723217,1,2015,3,Q2,H5,1.683


In [164]:
Hour_encoder = LabelEncoder()
Month_encoder = LabelEncoder()

df.Monthly_Quarter = Month_encoder.fit_transform(df.Monthly_Quarter)
df.Hourly_Segments = Hour_encoder.fit_transform(df.Hourly_Segments)

Hour_Segments_decoded = Hour_encoder.inverse_transform(df.Hourly_Segments)
print("Hour Segments Decoded Categories:", Hour_Segments_decoded)

Month_Segments_decoded = Month_encoder.inverse_transform(df.Monthly_Quarter)
print("Decoded Categories:", Month_Segments_decoded)

Hour Segments Decoded Categories: ['H5']
Decoded Categories: ['Q2']


In [165]:
# transformed
df

Unnamed: 0,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,passenger_count,year,weekday,Monthly_Quarter,Hourly_Segments,distance_traveled
0,-73.999817,40.738354,-73.999512,40.723217,1,2015,3,0,0,1.683


### Feature List
- passenger_count
- year
- weekday
- Monthly_Quarter
- Hourly_Segments
- distance_traveled

In [166]:
df.drop(['pickup_latitude','pickup_longitude','dropoff_latitude','dropoff_longitude'],axis=1,inplace=True)
df

Unnamed: 0,passenger_count,year,weekday,Monthly_Quarter,Hourly_Segments,distance_traveled
0,1,2015,3,0,0,1.683


In [167]:
df.values

array([[1.000e+00, 2.015e+03, 3.000e+00, 0.000e+00, 0.000e+00, 1.683e+00]])

In [168]:
df

Unnamed: 0,passenger_count,year,weekday,Monthly_Quarter,Hourly_Segments,distance_traveled
0,1,2015,3,0,0,1.683


In [169]:
data_to_predict = transformer.transform(df)

In [170]:
predicted_result = model.predict(data_to_predict)

In [173]:
# fare price predicted
round(predicted_result[0],2)

6.97