# This Notebook contains the streamlit forecasting application code

we will load the models and all the necessary files needed to make the forecast. then we will combine the the forecast result into one dataframe, plot the result on folium map and make some analysis.

__Please note I will also upload the full app folder which contain Docker file, app.yaml, app.py, and all the necessary files__

In [2]:
from keras.models import load_model
import pickle
import tensorflow as tf
import holidays
import pandas as pd
import numpy as np
from pickle import load
import streamlit as st
import folium
from streamlit_folium import st_folium
import altair as alt
import warnings
warnings.filterwarnings('ignore')

1- This function will be used to add locationID coloum to the weather data for all the locations used in the machine learning model

In [3]:
def locationsID_Generate_ml_model(dataframe,locationID):
    df = dataframe.copy(deep=True)
    df['LocationID']=locationID
    df.reset_index(inplace = True,drop=True)
    return df

2- This function will be used to transform the the deep learning model prediction to the same format of the machine learning model predction so we will be able to concate them by rows.

In [4]:
def locationsID_Generate_deep_model(dataframe,location):
    df = dataframe.copy(deep=True)
    df['LocationID']=location
    df['Trips_Forcast'] = df[location]
    df = df[['LocationID','datetime','Trips_Forcast']]
    df.reset_index(inplace = True,drop=True)
    return df

# 3- Define function to prepare the data for ML model forcast


In [5]:
def prepare_weather_data(dataframe):
    '''
    The ML model expect the data to contain the following coloumns:
    (LocationID,Year, Month, DayOfMonth, Hour, dayofweek, temp, humidity, precip, snow, windspeed, Holiday, IsWeekend)
    so we will preprare the forecast data to have this formate
    '''

    
    dataframe['datetime']= dataframe['datetime'].astype('datetime64[ns]')
    dataframe['datetime1'] = dataframe['datetime'].dt.strftime('%Y-%m-%d%-H%M')
    # filtering the weather data from 1-May-2022 onward
    dataframe=dataframe[(dataframe['datetime'] >= '2022-05-01')]
    
    # Create features from datetime column
    dataframe['Year'] = dataframe['datetime'].dt.year
    dataframe['Month'] = dataframe['datetime'].dt.month
    dataframe['DayOfMonth'] = dataframe['datetime'].dt.day
    dataframe['Hour'] = dataframe['datetime'].dt.hour
    dataframe['dayofweek'] = dataframe['datetime'].dt.dayofweek
    
    # create IsWeekend feature
    dataframe["IsWeekend"] = dataframe["dayofweek"] >= 5
    dataframe['IsWeekend'].replace({True:1,False:0}, inplace=True)
    
    # create date string column ( we will use this feature to merge this data with holiday data)
    dataframe['date']=dataframe['datetime'].apply(lambda x: x.strftime('%d%m%Y'))
    
    # create holiday column, creating holidays dataset based on United states using holiday package
    
    
    holiday_list = []
    for holiday in holidays.UnitedStates(years=[2022]).items():
        holiday_list.append(holiday)
    
    # this contain all the holiday for 2022
    holidays_df = pd.DataFrame(holiday_list, columns=["date", "holiday"])
    # creating int holiday coloum not string
    holidays_df['holiday']=1
    
    # now creating the same date string column to merge with the weather data
    holidays_df['date']=holidays_df['date'].apply(lambda x: x.strftime('%d%m%Y'))
    
    # join holiday with weather data
    dataframe=dataframe.merge(holidays_df, on='date', how='left')
    # filling the nan values in holiday with zero (zero means no holiday, 1 means there is holiday)
    dataframe['holiday'] = dataframe['holiday'].fillna(0)
    
    # now we will create data set which will have the merged data plus location ID column 
    
    location_list = [263, 262, 261, 249, 246, 244, 243, 239, 238, 234, 233, 232, 231, 230, 229, 224, 211, 209, 202, 194, 170, 166, 164, 163, 158,
    153, 152, 151, 148, 144, 143, 142, 141, 140, 137, 128, 127, 125, 120, 116, 114, 113, 107, 105, 100, 90, 88, 87, 79, 75, 74,
    68, 50, 48, 45, 43, 42, 41, 24, 13, 12, 4]
    
    # create empty dataframe
    df=pd.DataFrame()
    for i in location_list:
        generate_location=locationsID_Generate_ml_model(dataframe,i)
        df = pd.concat([df, generate_location], axis=0)
        #df = df.concat(generate_location)
    
    # now choosing the same features the ML model was trained on
    df_forcast = df[['LocationID','datetime','Year', 'Month', 'DayOfMonth', 'Hour', 'dayofweek',
       'temp', 'humidity', 'precip', 'snow', 'windspeed', 'holiday',
       'IsWeekend']]
    return df_forcast

In [6]:
df_temp=pd.read_csv('new york_weather.csv',usecols=['datetime','temp','humidity','precip','snow','windspeed'])

In [9]:
df_forcast = prepare_weather_data(df_temp)

In [10]:
df_forcast

Unnamed: 0,LocationID,datetime,Year,Month,DayOfMonth,Hour,dayofweek,temp,humidity,precip,snow,windspeed,holiday,IsWeekend
0,263,2022-05-01 00:00:00,2022,5,1,0,6,11.9,37.69,0.0,0.0,3.8,0.0,1
1,263,2022-05-01 01:00:00,2022,5,1,1,6,11.4,48.40,0.0,0.0,2.9,0.0,1
2,263,2022-05-01 02:00:00,2022,5,1,2,6,10.4,56.34,0.0,0.0,3.7,0.0,1
3,263,2022-05-01 03:00:00,2022,5,1,3,6,9.8,61.46,0.0,0.0,0.9,0.0,1
4,263,2022-05-01 04:00:00,2022,5,1,4,6,9.0,67.81,0.0,0.0,2.6,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2203,4,2022-07-31 19:00:00,2022,7,31,19,6,26.2,61.96,0.0,0.0,6.5,0.0,1
2204,4,2022-07-31 20:00:00,2022,7,31,20,6,25.8,68.64,0.0,0.0,8.9,0.0,1
2205,4,2022-07-31 21:00:00,2022,7,31,21,6,25.8,70.72,0.0,0.0,5.0,0.0,1
2206,4,2022-07-31 22:00:00,2022,7,31,22,6,25.9,69.15,0.0,0.0,7.3,0.0,1


# 2- Define function to make the prediction

This function will load the models and make prediction, then it will cobmine the prediction in one dataframe

In [8]:
def data_forcast(dataframe,days_to_forcast):
    
    #Loading the models (Stacking Model)

    with open('ML_model.pkl' , 'rb') as f:
        ml_model = pickle.load(f)
        
    #Loading the models (CNN-LSTM-Encoder Decoder Model)
    
    dl_model = load_model("CNN_Encoder_Decoder_final_model.h5")
    
                    #================= Forcast using the Machine Learning Model===============================
    
    # here we need to filter out the prepared data for machine learning to match with the forcast time range
    # since the ML model was trained for the weather data from January 2020 till April-2022, then we need to filter out the
    # prepared data from 1-may-2022 till the end of the forecasted dates.
    
    filtered_data = dataframe.loc[(dataframe['Month']==5) & (dataframe['DayOfMonth']>= 1) & (dataframe['DayOfMonth']< days_to_forcast+1)]
    preparded_data=filtered_data.drop(['datetime'], axis=1)
    
    # make prediction using ML model
    ML_forcast = ml_model.predict(preparded_data)
    # convert the prediction to integer since we have float data type and add the prediction to the filtered data
    filtered_data['Trips_Forcast']=ML_forcast.astype(int)
    
    # choose the the wanted coloumns 'LocationID', 'datetime', 'Trips_Forcast'
    filtered_data=filtered_data[['LocationID', 'datetime', 'Trips_Forcast']]
    
    # since we have negative prediction, we will convert any negative value to zero
    filtered_data['Trips_Forcast'].mask(filtered_data['Trips_Forcast'] < 0, 0, inplace=True)
    
    
                    #================= Forcast using the Deep Learning Model===============================
    
    # since the deep learning model forecast hourly data, then the forecast range will be 24* days to forecast
    FORCAST = 24*days_to_forcast
    
    # to be able to predict the values from 1-May forward, we will need the last 24 hours from the normalize trainin data
    X_test_mod = np.load('last_24.npy')
    X_test_mod = X_test_mod.reshape((1, 24, 5))
    y_preds = []
    for n in range(FORCAST):
        y_pred = dl_model.predict(X_test_mod, verbose=0)
    
        X_test_mod = np.append(X_test_mod, y_pred, axis=1)
        X_test_mod = X_test_mod[:,1:, :]
        y_preds = np.append(y_preds, y_pred)
    
    y_preds_reshaped = y_preds.reshape(-1,5)
    
    # we will inverse the normalize values using saved scaler.
    scaler = load(open('scaler1.pkl', 'rb'))
    y_preds_inverse = scaler.inverse_transform(y_preds_reshaped)
    
     # convert the prediction to integer since we have float data type
    y_preds_inverse = y_preds_inverse.astype(int)
    
    # create dataframe from the prediction
    forcast_df = pd.DataFrame(data=y_preds_inverse,columns=[161,162,186,236,237])
    
    # create timeseries series from 1-May-2022 till the end of forecast range
    future_date=pd.date_range('2022-05-01', periods=FORCAST, freq='H')
    
    # adding the timeseries to the forecast dataframe
    forcast_df['datetime'] = future_date
    
    # now we will transform the forecast dataframe to the same output format of the ML prediction to concate them together
    locations=[i for i in forcast_df.columns if i != 'datetime'] # to choose only the 5 locations 
    forcast_df_transfrom=pd.DataFrame()
    for i in locations:
        generate_location=locationsID_Generate_deep_model(forcast_df,i)
        
        forcast_df_transfrom = pd.concat([forcast_df_transfrom, generate_location], axis=0)
        #forcast_df_transfrom = forcast_df_transfrom.append(generate_location)
        
    # since we have negative prediction, we will convert any negative value to zero    
    forcast_df_transfrom['Trips_Forcast'].mask(forcast_df_transfrom['Trips_Forcast'] < 0, 0, inplace=True)
    
    # now concating the ML prediction with DL prediction
    combined_forcast = pd.concat([filtered_data, forcast_df_transfrom], axis=0)
    
    # create Month, hour features
    combined_forcast['DayOfMonth'] = combined_forcast['datetime'].dt.day
    combined_forcast['Hour'] = combined_forcast['datetime'].dt.hour
    
    # to be able to visualize the hourly prediction in the map, I have noticed that locationID 105 is not available in the Geojson file
    # which will cause errors later when we use folium map. so I had to remove the prediction for this location
    combined_forcast = combined_forcast[~combined_forcast['LocationID'].isin([105])]
    
    # now converting the LocationID to string
    combined_forcast['LocationID'] = combined_forcast['LocationID'].apply(str)

    return combined_forcast

### Making forecast

In [11]:
data_forcast(df_forcast,3)



Unnamed: 0,LocationID,datetime,Trips_Forcast,DayOfMonth,Hour
0,263,2022-05-01 00:00:00,67,1,0
1,263,2022-05-01 01:00:00,39,1,1
2,263,2022-05-01 02:00:00,27,1,2
3,263,2022-05-01 03:00:00,12,1,3
4,263,2022-05-01 04:00:00,22,1,4
...,...,...,...,...,...
67,237,2022-05-03 19:00:00,397,3,19
68,237,2022-05-03 20:00:00,371,3,20
69,237,2022-05-03 21:00:00,308,3,21
70,237,2022-05-03 22:00:00,267,3,22


# 3- Define function to display the prediction on map using folium

In [23]:
def display_map(dataframe,day,hour):
    dataframe = dataframe.loc[(dataframe['DayOfMonth']==day) & (dataframe['Hour']== hour)]
    dataframe=dataframe.drop(['datetime'], axis=1)

    map = folium.Map(location=[40.7831, -73.9712], zoom_start=11.4,scrollWheelZoom=False,tiles='CartoDB positron')
    choropleth = folium.Choropleth(
        geo_data='manhattan_zones.geojson',
        data=dataframe,
        columns=('LocationID','Trips_Forcast'),
        key_on='feature.properties.location_id',
        fill_color="YlOrRd",
        line_opacity=1,
        highlight=True
        
        )
    choropleth.geojson.add_to(map)

    dataframe=dataframe.set_index('LocationID')


    for feature in choropleth.geojson.data['features']:
        Location_ID = feature['properties']['location_id']
        feature['properties']['Trips_Forcast'] =  str(dataframe.loc[Location_ID,'Trips_Forcast'])
    


    choropleth.geojson.add_child(
        folium.features.GeoJsonTooltip(
            fields=['location_id','zone','Trips_Forcast'],
            aliases=['Location ID : ', 'Area Name : ', 'Forcasted Taxi trips : '])
    )
    st_map = st_folium(map,width = 700, height = 450)


# 4- Define function to make line chart and bar plot

In [24]:
def line_chart(df):
    brush = alt.selection(type='interval', encodings=['x'])
    base = alt.Chart(df).mark_line().encode(
    x = 'datetime:T',
    y = 'Trips_Forcast:Q'
    ).properties(
    width=600,
    height=200)
    upper = base.encode(
    alt.X('datetime:T', scale=alt.Scale(domain=brush)))
    lower = base.properties(
    height=60).add_selection(brush)

    return alt.vconcat(upper, lower)

In [25]:
def bar_plot(dataframe):

    bar =alt.Chart(dataframe).mark_bar().encode(
    x='Trips_Forcast:Q',
    y=alt.Y('Zone:N', sort='-x'),
    tooltip=['Trips_Forcast'])
    return bar

# 5- The full streamlit app code combined "app.py"

In [None]:


APP_TITLE = 'Manhattan Taxi Demand Prediction"
APP_SUBTITLE = "I developed a taxi demand forecasting system for Manhattan that uses combination of machine learning models and deep neural network model. It uses the CNN-LSTM Encoder Decoder model to forecast Manhattan's top 5 locations, while it uses the Stacked Machine Learning model to forecast the remaining locations"


def locationsID_Generate_ml_model(dataframe,locationID):
    df = dataframe.copy(deep=True)
    df['LocationID']=locationID
    df.reset_index(inplace = True,drop=True)
    return df

def locationsID_Generate_deep_model(dataframe,location):
    df = dataframe.copy(deep=True)
    df['LocationID']=location
    df['Trips_Forcast'] = df[location]
    df = df[['LocationID','datetime','Trips_Forcast']]
    df.reset_index(inplace = True,drop=True)
    return df


#################################################################### #################################################################
def preparded_data_forcast(dataframe,days_to_forcast):

    with open('ML_model.pkl' , 'rb') as f:
        ml_model = pickle.load(f)
    
    dl_model = load_model("CNN_Encoder_Decoder_final_model.h5")
    
                    #================= Forcast using the Machine Learning Model===============================
    
    # here we need to filter out the prepared data for machine learning to match with the forcast time range
    # since the ML model was trained for the weather data from January 2020 till April-2022, then we need to filter out the
    # prepared data from 1-may-2022 till the end of the forecasted dates.
    
    filtered_data = dataframe.loc[(dataframe['Month']==5) & (dataframe['DayOfMonth']>= 1) & (dataframe['DayOfMonth']< days_to_forcast+1)]
    preparded_data=filtered_data.drop(['datetime'], axis=1)
    
    # make prediction using ML model
    ML_forcast = ml_model.predict(preparded_data)
    # convert the prediction to integer since we have float data type and add the prediction to the filtered data
    filtered_data['Trips_Forcast']=ML_forcast.astype(int)
    
    # choose the the wanted coloumns 'LocationID', 'datetime', 'Trips_Forcast'
    filtered_data=filtered_data[['LocationID', 'datetime', 'Trips_Forcast']]
    
    # since we have negative prediction, we will convert any negative value to zero
    filtered_data['Trips_Forcast'].mask(filtered_data['Trips_Forcast'] < 0, 0, inplace=True)
    
    
                    #================= Forcast using the Deep Learning Model===============================
    
    # since the deep learning model forecast hourly data, then the forecast range will be 24* days to forecast
    FORCAST = 24*days_to_forcast
    
    # to be able to predict the values from 1-May forward, we will need the last 24 hours from the normalize trainin data
    X_test_mod = np.load('last_24.npy')
    X_test_mod = X_test_mod.reshape((1, 24, 5))
    y_preds = []
    for n in range(FORCAST):
        y_pred = dl_model.predict(X_test_mod, verbose=0)
    
        X_test_mod = np.append(X_test_mod, y_pred, axis=1)
        X_test_mod = X_test_mod[:,1:, :]
        y_preds = np.append(y_preds, y_pred)
    
    y_preds_reshaped = y_preds.reshape(-1,5)
    
    # we will inverse the normalize values using saved scaler.
    scaler = load(open('scaler1.pkl', 'rb'))
    y_preds_inverse = scaler.inverse_transform(y_preds_reshaped)
    
     # convert the prediction to integer since we have float data type
    y_preds_inverse = y_preds_inverse.astype(int)
    
    # create dataframe from the prediction
    forcast_df = pd.DataFrame(data=y_preds_inverse,columns=[161,162,186,236,237])
    
    # create timeseries series from 1-May-2022 till the end of forecast range
    future_date=pd.date_range('2022-05-01', periods=FORCAST, freq='H')
    
    # adding the timeseries to the forecast dataframe
    forcast_df['datetime'] = future_date
    # now we will transform the forecast dataframe to the same output format of the ML prediction to concate them together
    locations=[i for i in forcast_df.columns if i != 'datetime']
    forcast_df_transfrom=pd.DataFrame()
    for i in locations:
        generate_location=locationsID_Generate_deep_model(forcast_df,i)
        forcast_df_transfrom = forcast_df_transfrom.append(generate_location)
        
    # since we have negative prediction, we will convert any negative value to zero    
    forcast_df_transfrom['Trips_Forcast'].mask(forcast_df_transfrom['Trips_Forcast'] < 0, 0, inplace=True)
    
    # now concating the ML prediction with DL prediction
    combined_forcast = pd.concat([filtered_data, forcast_df_transfrom], axis=0)
    
    # create Month, hour features
    combined_forcast['DayOfMonth'] = combined_forcast['datetime'].dt.day
    combined_forcast['Hour'] = combined_forcast['datetime'].dt.hour
    
    # to be able to visualize the hourly prediction in the map, I have noticed that locationID 105 is not available in the Geojson file
    # which will cause errors later when we use folium map. so I had to remove the prediction for this location
    combined_forcast = combined_forcast[~combined_forcast['LocationID'].isin([105])]
    
    # now converting the LocationID to string
    combined_forcast['LocationID to stringned_forcast['LocationID'].apply(str)

    return combined_forcast

            #=================== Creating the folium map to display the hourly prediction =========================
                     
def display_map(dataframe,day,hour):
    dataframe = dataframe.loc[(dataframe['DayOfMonth']==day) & (dataframe['Hour']== hour)]
    dataframe=dataframe.drop(['datetime'], axis=1)

    map = folium.Map(location=[40.7831, -73.9712], zoom_start=11.4,scrollWheelZoom=False,tiles='CartoDB positron')
    choropleth = folium.Choropleth(
        geo_data='manhattan_zones.geojson',
        data=dataframe,
        columns=('LocationID','Trips_Forcast'),
        key_on='feature.properties.location_id',
        fill_color="YlOrRd",
        line_opacity=1,
        highlight=True
        
        )
    choropleth.geojson.add_to(map)

    dataframe=dataframe.set_index('LocationID')


    for feature in choropleth.geojson.data['features']:
        Location_ID = feature['properties']['location_id']
        feature['properties']['Trips_Forcast'] =  str(dataframe.loc[Location_ID,'Trips_Forcast'])
    


    choropleth.geojson.add_child(
        folium.features.GeoJsonTooltip(
            fields=['location_id','zone','Trips_Forcast'],
            aliases=['Location ID : ', 'Area Name : ', 'Forcasted Taxi trips : '])
    )
    st_map = st_folium(map,width = 700, height = 450)

                    #======================== Create time series trend line chart using altair ===========================
    
def line_chart(df):
    brush = alt.selection(type='interval', encodings=['x'])
    base = alt.Chart(df).mark_line().encode(
    x = 'datetime:T',
    y = 'Trips_Forcast:Q'
    ).properties(
    width=600,
    height=200)
    upper = base.encode(
    alt.X('datetime:T', scale=alt.Scale(domain=brush)))
    lower = base.properties(
    height=60).add_selection(brush)

    return alt.vconcat(upper, lower)
                     

                     # ====================== Create bar chart to display the top location ===========================

def bar_plot(dataframe):

    bar =alt.Chart(dataframe).mark_bar().encode(
    x='Trips_Forcast:Q',
    y=alt.Y('Zone:N', sort='-x'),
    tooltip=['Trips_Forcast'])
    return bar


                      #=================== The main streamlit app  ================================

def main():
    st.set_page_config(APP_TITLE)
    st.title(APP_TITLE)
    st.caption(APP_SUBTITLE)



    ## prepare the data for forcasting

    df_temp=pd.read_csv('new york_weather.csv',usecols=['datetime','temp','humidity','precip','snow','windspeed'])
    df_temp['datetime']= df_temp['datetime'].astype('datetime64[ns]')
    df_temp['datetime1'] = df_temp['datetime'].dt.strftime('%Y-%m-%d%-H%M')
    df_temp=df_temp[(df_temp['datetime'] >= '2022-05-01')]


    # Create features from datetime column
    df_temp['Year'] = df_temp['datetime'].dt.year
    df_temp['Month'] = df_temp['datetime'].dt.month
    df_temp['DayOfMonth'] = df_temp['datetime'].dt.day
    df_temp['Hour'] = df_temp['datetime'].dt.hour
    df_temp['dayofweek'] = df_temp['datetime'].dt.dayofweek

    df_temp["IsWeekend"] = df_temp["dayofweek"] >= 5
    df_temp['IsWeekend'].replace({True:1,False:0}, inplace=True)
    # create date string column
    df_temp['date']=df_temp['datetime'].apply(lambda x: x.strftime('%d%m%Y'))

    # create holiday column
    # creating holidays dataset based on United states

    holiday_list = []
    for holiday in holidays.UnitedStates(years=[2022]).items():
        holiday_list.append(holiday)

    holidays_df = pd.DataFrame(holiday_list, columns=["date", "holiday"])
    holidays_df['holiday']=1
    holidays_df['date']=holidays_df['date'].apply(lambda x: x.strftime('%d%m%Y'))

    # join holiday with df_temp
    df_temp=df_temp.merge(holidays_df, on='date', how='left')
    # filling the nan values with zero
    df_temp['holiday'] = df_temp['holiday'].fillna(0)

    location_list = [263, 262, 261, 249, 246, 244, 243, 239, 238, 234, 233, 232, 231, 230, 229, 224, 211, 209, 202, 194, 170, 166, 164, 163, 158,
    153, 152, 151, 148, 144, 143, 142, 141, 140, 137, 128, 127, 125, 120, 116, 114, 113, 107, 105, 100, 90, 88, 87, 79, 75, 74,
    68, 50, 48, 45, 43, 42, 41, 24, 13, 12, 4]

    # create empty dataframe
    df=pd.DataFrame()
    for i in location_list:
        generate_location=locationsID_Generate_ml_model(df_temp,i)
        df = df.append(generate_location)

    df_forcast = df[['LocationID','datetime','Year', 'Month', 'DayOfMonth', 'Hour', 'dayofweek',
       'temp', 'humidity', 'precip', 'snow', 'windspeed', 'holiday',
       'IsWeekend']]

    # create sidebar select for how many days to forecast in the future

    Forecast_list = [3,7,14]

    Forecast_days = st.sidebar.selectbox('Days to forcast',Forecast_list)



    combined_forcast = preparded_data_forcast(df_forcast,Forecast_days)


    

    day_list = list(combined_forcast['DayOfMonth'].unique())
    Filter_days = st.sidebar.selectbox('Select the day to view on map',day_list)

    hour_list = list(combined_forcast['Hour'].unique())
    Filter_hour = st.sidebar.selectbox('Select the forecasted hour to view on map',hour_list)

    st.header(f'{Filter_days}-May-2022: {Filter_hour}:00' )

    col1,col2,col3 = st.columns(3)
    with col1:
        Total_daily_trips = combined_forcast[(combined_forcast['DayOfMonth']==Filter_days)]['Trips_Forcast'].sum()
        st.metric("No. trips count at this day",'{:,}'.format(Total_daily_trips))
    with col2:
        Total_hourly_trips = combined_forcast[(combined_forcast['DayOfMonth']==Filter_days) & (combined_forcast['Hour']==Filter_hour)]['Trips_Forcast'].sum()
        st.metric("No. of trips count in this this hour",'{:,}'.format(Total_hourly_trips))
    with col3:
        percentage = round(((Total_hourly_trips/Total_daily_trips)*100),2)
        st.metric(" Percentage of trips at this hour",percentage)




# displaying the map
    display_map(combined_forcast,Filter_days,Filter_hour)


# displaying top 10 hourly location forcasted count
    st.subheader(f'Top 10 locations according to the number of forcasted trips on the selected hour {Filter_hour}:00' )


    Hourly_top_counts = combined_forcast[(combined_forcast['DayOfMonth']==Filter_days) & (combined_forcast['Hour']==Filter_hour)]
    Hourly_top_counts=Hourly_top_counts.sort_values("Trips_Forcast", axis = 0, ascending = False, na_position ='first')
    # reading location lookup file
    location_lookup=pd.read_csv('taxi_Zone_lookup.csv',usecols=['LocationID','Zone'])
    location_lookup['LocationID'] = location_lookup['LocationID'].astype(str)
    # merging the data
    Hourly_top_counts=Hourly_top_counts.merge(location_lookup, on='LocationID', how='left')
    # chooing top 10 N-largest values
    Hourly_top_counts = Hourly_top_counts.nlargest(10, "Trips_Forcast")
    Hourly_top_bar_chart=bar_plot(Hourly_top_counts)
    st.altair_chart(Hourly_top_bar_chart, use_container_width=True)

    st.subheader(f'Line chart trend for the forecasted period' )

    # displating the line chart
    group=combined_forcast.groupby('datetime')[['Trips_Forcast']].sum()
    group.reset_index(inplace=True)
    chart_data = line_chart(group)
    st.altair_chart(chart_data, use_container_width=True)

# displaying 
    st.subheader(f'Top 10 locations according to the number of forcasted trips on {Filter_days}-May-2022' )

    Daily_top_counts = combined_forcast[(combined_forcast['DayOfMonth']==Filter_days)]
    Daily_top_counts=Daily_top_counts.groupby('LocationID')[['Trips_Forcast']].sum()
    Daily_top_counts.reset_index(inplace=True)
    Daily_top_counts = Daily_top_counts.sort_values("Trips_Forcast", axis = 0, ascending = False, na_position ='first')
    Daily_top_counts['LocationID'] = Daily_top_counts['LocationID'].astype(str)
    Daily_top_counts=Daily_top_counts.merge(location_lookup, on='LocationID', how='left')
    Daily_top_counts = Daily_top_counts.nlargest(10, "Trips_Forcast")
    Daily_top_bar_chart=bar_plot(Daily_top_counts)
    st.altair_chart(Daily_top_bar_chart, use_container_width=True)









#############################################################################################




if __name__ =="__main__":
    main()