In [None]:
#Import dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import scipy.stats as stats
from scipy.stats import linregress
import gmaps
import json
import urllib

#Import API keys
from config import (api_key, gkey)


# Configure gmaps
gmaps.configure(api_key=gkey)

#Import Manhattan zones csv
manhattan_zones = pd.read_csv("../../../Desktop/NYC Taxi & FHV Data/manhattan_zones.csv")
manhattan_zones.head()

In [None]:
#Import Taxi June 2015 data
june_2015_taxi = pd.read_csv("../../../Desktop/NYC Taxi & FHV Data/clean_csvs/taxi_june_2015.csv")

#Import FHV June 2015 data
june_2015_fhv = pd.read_csv("../../../Desktop/NYC Taxi & FHV Data/clean_csvs/FHV_june_2015.csv")

#Import Taxi Dec 2015 data
dec_2015_taxi = pd.read_csv("../../../Desktop/NYC Taxi & FHV Data/clean_csvs/taxi_dec_2015.csv")

#Import FHV Dec 2015 data
dec_2015_fhv = pd.read_csv("../../../Desktop/NYC Taxi & FHV Data/clean_csvs/FHV_dec_2015.csv")

#Import Taxi June 2019 csv data
june_2019_taxi = pd.read_csv("../../../Desktop/NYC Taxi & FHV Data/clean_csvs/taxi_june_2019.csv")

#Import FHV June 2019 csv data
june_2019_highvol = pd.read_csv("../../../Desktop/NYC Taxi & FHV Data/clean_csvs/HighVol_june_2019.csv")

#Import Taxi Dec 2019 data
dec_2019_taxi = pd.read_csv("../../../Desktop/NYC Taxi & FHV Data/clean_csvs/taxi_dec_2019.csv")

#Import FHV Dec 2019 csv data
dec_2019_highvol = pd.read_csv("../../../Desktop/NYC Taxi & FHV Data/clean_csvs/HighVol_dec_2019.csv")

# Percent of rides

In [None]:
#Taxi data June 2015
#count number of rides by date
ride_numbers_june_2015 = june_2015_taxi["tpep_pickup_datetime"].value_counts()

#Format data frame and sort
ride_numbers_june_2015_df = pd.DataFrame(ride_numbers_june_2015).reset_index().rename(columns = {"index": "Pickup Date", "tpep_pickup_datetime": "Number of Rides June 2015"})
ride_numbers_june_2015_df["Pickup Date"] = pd.to_datetime(ride_numbers_june_2015_df["Pickup Date"]).dt.strftime('%m-%d')
sorted_ride_numbers_june_2015 = ride_numbers_june_2015_df.sort_values(by = "Pickup Date")

#Display data frame
sorted_ride_numbers_june_2015.head()

In [None]:
#Taxi data June 2019
#Count number of rides by date for June 2019 taxi data
ride_numbers_june_2019 = june_2019_taxi["pickup_date"].value_counts()

#Format data frame and sort
ride_numbers_june_2019_df = pd.DataFrame(ride_numbers_june_2019).reset_index().rename(columns = {"index": "Pickup Date", "pickup_date": "Number of Rides June 2019"})
ride_numbers_june_2019_df["Pickup Date"] = pd.to_datetime(ride_numbers_june_2019_df["Pickup Date"]).dt.strftime('%m-%d')
sorted_ride_numbers_june_2019 = ride_numbers_june_2019_df.sort_values(by = "Pickup Date")

#Display data frame
sorted_ride_numbers_june_2019.head()

In [None]:
#FHV data June 2015
#Count number of rides by date for June 2015 FHV data
ride_numbers_fhv_june_2015 = june_2015_fhv["pickup_date"].value_counts()

#Format data frame and sort
ride_numbers_fhv_june_2015 = pd.DataFrame(ride_numbers_fhv_june_2015).reset_index().rename(columns = {"index": "Pickup Date", "pickup_date": "Number of FHV Rides 2015"})
ride_numbers_fhv_june_2015["Pickup Date"] = pd.to_datetime(ride_numbers_fhv_june_2015["Pickup Date"]).dt.strftime('%m-%d')
sorted_ride_numbers_fhv_2015 = ride_numbers_fhv_june_2015.sort_values(["Pickup Date"], ascending = True)

#Display data frame
sorted_ride_numbers_fhv_2015.head()

In [None]:
#FHV data June 2019
#Find ride numbers by pickup date
ride_numbers_hvfhv_june_2019 = june_2019_highvol["pickup_date"].value_counts()

#Format and sort data frame
ride_numbers_hvfhv_june_2019 = pd.DataFrame(ride_numbers_hvfhv_june_2019).reset_index().rename(columns = {"index": "Pickup Date", "pickup_date": "Number of FHV Rides 2019"})
ride_numbers_hvfhv_june_2019["Pickup Date"] = pd.to_datetime(ride_numbers_hvfhv_june_2019["Pickup Date"]).dt.strftime('%m-%d')
sorted_ride_numbers_hvfhv_june = ride_numbers_hvfhv_june_2019.sort_values(["Pickup Date"], ascending = True)

#Display data frame
sorted_ride_numbers_hvfhv_june.head()

In [None]:
#Compare taxi and FHV data in June 2015
combined_june_2015 = sorted_ride_numbers_june_2015.merge(sorted_ride_numbers_fhv_2015, how = "outer", on = "Pickup Date")
combined_june_2015["Total Rides"] = combined_june_2015["Number of Rides June 2015"] + combined_june_2015["Number of FHV Rides 2015"]

#Display data frame
combined_june_2015.head()

In [None]:
#Calculate the percent of taxi rides vs FHV rides
percent_rides_taxi = combined_june_2015["Number of Rides June 2015"]/combined_june_2015["Total Rides"] * 100
percent_rides_fhv = combined_june_2015["Number of FHV Rides 2015"]/combined_june_2015["Total Rides"] * 100

#Create new data frame
percent_2015 = pd.DataFrame({"Date": combined_june_2015["Pickup Date"],
                            "Percent Rides Taxi": percent_rides_taxi,
                            "Percent Rides FHV": percent_rides_fhv
                            })

percent_2015.head()

In [None]:
#Line graph comparing percent of total rides for taxi vs FHV in June 2015
line = percent_2015.plot(kind = "line", legend = True)

plt.title("Percent Taxi vs FHV Rides June 2015")
plt.xlabel("Date in June")
plt.ylabel("Percent of Total Rides")
plt.grid()
plt.show()

In [None]:
#Compare taxi and FHV data in June 2019
combined_june_2019 = sorted_ride_numbers_june_2019.merge(sorted_ride_numbers_hvfhv_june, how = "outer", on = "Pickup Date")
combined_june_2019["Total Rides"] = combined_june_2019["Number of Rides June 2019"] + combined_june_2019["Number of FHV Rides 2019"]

#Display data frame
combined_june_2019.head()

In [None]:
#Calculate the percent of taxi rides vs FHV rides
percent_rides_taxi = combined_june_2019["Number of Rides June 2019"]/combined_june_2019["Total Rides"] * 100
percent_rides_fhv = combined_june_2019["Number of FHV Rides 2019"]/combined_june_2019["Total Rides"] * 100

#Create new data frame
percent_2019 = pd.DataFrame({"Date": combined_june_2019["Pickup Date"],
                            "Percent Rides Taxi": percent_rides_taxi,
                            "Percent Rides FHV": percent_rides_fhv
                            })

#Display data frame
percent_2019.head()

In [None]:
#Line graph comparing percent of total rides for taxi vs FHV in June 2019
line = percent_2019.plot(kind = "line", legend = True)

plt.title("Percent Taxi vs FHV Rides June 2019")
plt.xlabel("Date in June")
plt.ylabel("Percent of Total Rides")
plt.grid()
plt.show()

In [None]:
#Compile data for boxplot
plot_data = [percent_2015["Percent Rides Taxi"], percent_2015["Percent Rides FHV"], percent_2019["Percent Rides Taxi"], percent_2019["Percent Rides FHV"]]

#Create boxplot comparing percentage of rides from June 2015 to June 2019
plt.boxplot(plot_data, labels = ["Taxi Rides 2015", "FHV Rides 2015", "Taxi Rides 2019", "FHV Rides 2019"])

plt.title
plt.ylabel("Percent of Total Rides")
plt.grid()
plt.show()

In [None]:
#T-test comparing change in percent taxi rides
stats.ttest_ind(percent_2015["Percent Rides Taxi"], percent_2019["Percent Rides Taxi"], equal_var=False)

In [None]:
#T-test comparing change in percent FHV rides
stats.ttest_ind(percent_2015["Percent Rides FHV"], percent_2019["Percent Rides FHV"], equal_var=False)

# Taxi fare pricing

In [None]:
#June 2015 
june_15_fare_amount = yt_june_2015_df['fare_amount']
june_15_sum = june_15_fare_amount.sum()
june_15_mean = june_15_fare_amount.mean()
june_15_median = june_15_fare_amount.median()
june_15_rows = june_15_fare_amount.count()


print(f'The total fare collected in June 2015 is ${june_15_sum}')
print(f'The mean fare collected in June 2015 is ${june_15_mean}')
print(f'The median fare collected in June 2015 is ${june_15_median}')
print(f'The amount of rides in June 2015 is {june_15_rows}')

june_2015_df = pd.DataFrame({'Mean': june_15_mean, 'Sum': june_15_sum, 'Median': june_15_median, 
                             'Total Rides': june_15_rows}, index=[0])
june_2015_df

In [None]:
#Dec 2015
dec_15_fare_amount = yt_dec_2015_df['fare_amount']
dec_15_sum = dec_15_fare_amount.sum()
dec_15_mean = dec_15_fare_amount.mean()
dec_15_median = dec_15_fare_amount.median()
dec_15_rows = dec_15_fare_amount.count()


print(f'The total fare collected in Dec 2015 is ${dec_15_sum}')
print(f'The mean fare collected in Dec 2015 is ${dec_15_mean}')
print(f'The median fare collected in Dec 2015 is ${dec_15_median}')
print(f'The amount of rides in Dec 2015 is {dec_15_rows}')

dec_2015_df = pd.DataFrame({'Mean': dec_15_mean, 'Sum': dec_15_sum, 'Median': dec_15_median, 
                             'Total Rides': dec_15_rows}, index=[0])
dec_2015_df

In [None]:
#June 2019
fare_amount = yt_june_2019_df['fare_amount']
june_sum = fare_amount.sum()
june_mean = fare_amount.mean()
june_median = fare_amount.median()
june_rows = fare_amount.count()


print(f'The total fare collected in June 2019 is ${june_sum}')
print(f'The mean fare collected in June 2019 is ${june_mean}')
print(f'The median fare collected in June 2019 is ${june_median}')
print(f'The amount of rides in June 2019 is {june_rows}')

june_2019_df = pd.DataFrame({'Mean': june_mean, 'Sum': june_sum, 'Median': june_median, 
                             'Total Rides': june_rows}, index=[0])
june_2019_df

In [None]:
#Dec 2019
fare_amount = yt_dec_2019_df['fare_amount']
dec_sum = fare_amount.sum()
dec_mean = fare_amount.mean()
dec_median = fare_amount.median()
dec_rows = fare_amount.count()

print(f'The total fare collected in Dec 2019 is ${dec_sum}')
print(f'The mean fare collected in Dec 2019 is ${dec_mean}')
print(f'The median fare collected in Dec 2019 is ${dec_median}')
print(f'The amount of rides in Dec 2019 is {dec_rows}')

dec_2019_df = pd.DataFrame({'Mean': dec_mean, 'Sum': dec_sum, 'Median': dec_median, 
                             'Total Rides': dec_rows}, index=[0])
dec_2019_df

In [None]:
#inflation line
june_inflation = june_15_mean + (june_15_mean * .094)
june_inflation

dec_inflation = dec_15_mean + (dec_15_mean * .094)
dec_inflation

print(f'Expected price for June 2019 is ${june_inflation}')
print(f'Expected price for Dec 2019 is ${dec_inflation}')

In [None]:
#mean line plot
x_values = ("June 2015", "Dec 2015", "June 2019", "Dec 2019")
y_values = (june_15_mean, dec_15_mean, june_mean, dec_mean)
plt.plot(x_values, y_values, marker ='o', label="Actual Change")
plt.title('Average Fare Price per Month and Year')
plt.ylabel('Average Fare Price ($)')
plt.xlabel('Month and Year')

#inflation line
x_values2 = ("June 2015", "Dec 2015", "June 2019", "Dec 2019")
y_values2 = (june_15_mean, dec_15_mean, june_inflation, dec_inflation)
plt.plot(x_values2, y_values2, marker ='o', linestyle='--', label='Inflation Line')
plt.grid()
plt.ylim([8, 12])
plt.legend()

# Ride duration

# Ride times

In [None]:
# Taxi Pick-ups for June 2015
# Identify 4 different times of day and count the number of for vehicle rides that occur in each 6 hour interval
# yt_june_2015_df

yt_morning_june_15_df = yt_june_2015_df.loc[(yt_june_2015_df['pickup_time'] > '05:00:00') & (yt_june_2015_df['pickup_time']<='11:00:00')]
yt_june_morn_count = yt_morning_june_15_df.shape[0]

yt_day_june_15_df = yt_june_2015_df.loc[(yt_june_2015_df['pickup_time'] > '11:00:00') & (yt_june_2015_df['pickup_time']<='17:00:00')]
yt_june_day_count = yt_day_june_15_df.shape[0]

yt_evening_june_15_df = yt_june_2015_df.loc[(yt_june_2015_df['pickup_time'] > '17:00:00') & (yt_june_2015_df['pickup_time']<='23:00:00')]
yt_june_eve_count = yt_evening_june_15_df.shape[0]

yt_late_night_june_15_df = yt_june_2015_df.loc[(yt_june_2015_df['pickup_time'] > '23:00:00')]
yt_june_ln_count = yt_late_night_june_15_df.shape[0]

yt_later_night_june_15_df = yt_june_2015_df.loc[(yt_june_2015_df['pickup_time'] <= '05:00:00')]
yt_june_lrn_count = yt_later_night_june_15_df.shape[0]

yt_june_afthrs_count = yt_june_lrn_count + yt_june_ln_count

In [None]:
# Taxi Pick-ups for December 2015
# Identify 4 different times of day and count the number of for vehicle rides that occur in each 6 hour interval
# yt_dec_2015_df

yt_morning_dec_15_df = yt_dec_2015_df.loc[(yt_dec_2015_df['pickup_time'] > '05:00:00') & (yt_dec_2015_df['pickup_time']<='11:00:00')]
yt_dec_morn_count = yt_morning_dec_15_df.shape[0]

yt_day_dec_15_df = yt_dec_2015_df.loc[(yt_dec_2015_df['pickup_time'] > '11:00:00') & (yt_dec_2015_df['pickup_time']<='17:00:00')]
yt_dec_day_count = yt_day_dec_15_df.shape[0]

yt_evening_dec_15_df = yt_dec_2015_df.loc[(yt_dec_2015_df['pickup_time'] > '17:00:00') & (yt_dec_2015_df['pickup_time']<='23:00:00')]
yt_dec_eve_count = yt_evening_dec_15_df.shape[0]

yt_late_night_dec_15_df = yt_dec_2015_df.loc[(yt_dec_2015_df['pickup_time'] > '23:00:00')]
yt_dec_ln_count = yt_late_night_dec_15_df.shape[0]

yt_later_night_dec_15_df = yt_dec_2015_df.loc[(yt_dec_2015_df['pickup_time'] <= '05:00:00')]
yt_dec_lrn_count = yt_later_night_dec_15_df.shape[0]

yt_dec_afthrs_count = yt_dec_lrn_count + yt_dec_ln_count

In [None]:
# For Hire Vehicles Pick-ups for June 2015
# Identify 4 different times of day and count the number of for vehicle rides that occur in each 6 hour interval
# fvh_june_2015_df

fhv_morning_june_15_df = fhv_june_2015_df.loc[(fhv_june_2015_df['pickup_time'] > '05:00:00') & (fhv_june_2015_df['pickup_time']<='11:00:00')]
fhv_june_morn_count = fhv_morning_june_15_df.shape[0]

fhv_day_june_15_df = fhv_june_2015_df.loc[(fhv_june_2015_df['pickup_time'] > '11:00:00') & (fhv_june_2015_df['pickup_time']<='17:00:00')]
fhv_june_day_count = fhv_day_june_15_df.shape[0]

fhv_evening_june_15_df = fhv_june_2015_df.loc[(fhv_june_2015_df['pickup_time'] > '17:00:00') & (fhv_june_2015_df['pickup_time']<='23:00:00')]
fhv_june_eve_count = fhv_evening_june_15_df.shape[0]

fhv_late_night_june_15_df = fhv_june_2015_df.loc[(fhv_june_2015_df['pickup_time'] > '23:00:00')]
fhv_june_ln_count = fhv_late_night_june_15_df.shape[0]

fhv_later_night_june_15_df = fhv_june_2015_df.loc[(fhv_june_2015_df['pickup_time'] <= '05:00:00')]
fhv_june_lrn_count = fhv_later_night_june_15_df.shape[0]

fhv_june_afthrs_count = fhv_june_lrn_count + fhv_june_ln_count

In [None]:
# For Hire Vehicles Pick-ups for December 2015
# Identify 4 different times of day and count the number of for hire rides that occur in each 6 hour interval
# fhv_dec_2015_df

fhv_morning_dec_15_df = fhv_dec_2015_df.loc[(fhv_dec_2015_df['pickup_time'] > '05:00:00') & (fhv_dec_2015_df['pickup_time']<='11:00:00')]
fhv_dec_morn_count = fhv_morning_dec_15_df.shape[0]

fhv_day_dec_15_df = fhv_dec_2015_df.loc[(fhv_dec_2015_df['pickup_time'] > '11:00:00') & (fhv_dec_2015_df['pickup_time']<='17:00:00')]
fhv_dec_day_count = fhv_day_dec_15_df.shape[0]

fhv_evening_dec_15_df = fhv_dec_2015_df.loc[(fhv_dec_2015_df['pickup_time'] > '17:00:00') & (fhv_dec_2015_df['pickup_time']<='23:00:00')]
fhv_dec_eve_count = fhv_evening_dec_15_df.shape[0]

fhv_late_night_dec_15_df = fhv_dec_2015_df.loc[(fhv_dec_2015_df['pickup_time'] > '23:00:00')]
fhv_dec_ln_count = fhv_late_night_dec_15_df.shape[0]

fhv_later_night_dec_15_df = fhv_dec_2015_df.loc[(fhv_dec_2015_df['pickup_time'] <= '05:00:00')]
fhv_dec_lrn_count = fhv_later_night_dec_15_df.shape[0]

fhv_dec_afthrs_count = fhv_dec_lrn_count + fhv_dec_ln_count

In [None]:
# High Volume for Hire Vehicles Pick-ups for June 2019
# Identify 4 different times of day and count the number of high volume for vehicle rides that occur in each 6 hour interval
# hv_june_2019_df

hv_morning_june_19_df = hv_june_2019_df.loc[(hv_june_2019_df['pickup_time'] > '05:00:00') & (hv_june_2019_df['pickup_time']<='11:00:00')]
hv_june_morn_count = hv_morning_june_19_df.shape[0]

hv_day_june_19_df = hv_june_2019_df.loc[(hv_june_2019_df['pickup_time'] > '11:00:00') & (hv_june_2019_df['pickup_time']<='17:00:00')]
hv_june_day_count = hv_day_june_19_df.shape[0]

hv_evening_june_19_df = hv_june_2019_df.loc[(hv_june_2019_df['pickup_time'] > '17:00:00') & (hv_june_2019_df['pickup_time']<='23:00:00')]
hv_june_eve_count = hv_evening_june_19_df.shape[0]

hv_late_night_june_19_df = hv_june_2019_df.loc[(hv_june_2019_df['pickup_time'] > '23:00:00')]
hv_june_ln_count = hv_late_night_june_19_df.shape[0]

hv_later_night_june_19_df = hv_june_2019_df.loc[(hv_june_2019_df['pickup_time'] <= '05:00:00')]
hv_june_lrn_count = hv_later_night_june_19_df.shape[0]

hv_june_afthrs_count = hv_june_lrn_count + hv_june_ln_count

In [None]:
# High Volume for Hire Vehicles Pick-ups for December 2019
# Identify 4 different times of day and count the number of high volume for vehicle rides that occur in each 6 hour interval
# hv_dec_2019_df

hv_morning_dec_19_df = hv_dec_2019_df.loc[(hv_dec_2019_df['pickup_time'] > '05:00:00') & (hv_dec_2019_df['pickup_time']<='11:00:00')]
hv_dec_morn_count = hv_morning_dec_19_df.shape[0]

hv_day_dec_19_df = hv_dec_2019_df.loc[(hv_dec_2019_df['pickup_time'] > '11:00:00') & (hv_dec_2019_df['pickup_time']<='17:00:00')]
hv_dec_day_count = hv_day_dec_19_df.shape[0]

hv_evening_dec_19_df = hv_dec_2019_df.loc[(hv_dec_2019_df['pickup_time'] > '17:00:00') & (hv_dec_2019_df['pickup_time']<='23:00:00')]
hv_dec_eve_count = hv_evening_dec_19_df.shape[0]

hv_late_night_dec_19_df = hv_dec_2019_df.loc[(hv_dec_2019_df['pickup_time'] > '23:00:00')]
hv_dec_ln_count = hv_late_night_dec_19_df.shape[0]

hv_later_night_dec_19_df = hv_dec_2019_df.loc[(hv_dec_2019_df['pickup_time'] <= '05:00:00')]
hv_dec_lrn_count = hv_later_night_dec_19_df.shape[0]

hv_dec_afthrs_count = hv_dec_lrn_count + hv_dec_ln_count

In [None]:
# Yellow Taxi Pick-ups for June 2019
# Identify 4 different times of day and count the number of taxi rides that occur in each 6 hour interval
#yt_dec_2019_df

yt_morning_june_19_df = yt_june_2019_df.loc[(yt_june_2019_df['pickup_time'] > '05:00:00') & (yt_june_2019_df['pickup_time']<='11:00:00')]
yt_june_morn_count = yt_morning_june_19_df.shape[0]

yt_day_june_19_df = yt_june_2019_df.loc[(yt_june_2019_df['pickup_time'] > '11:00:00') & (yt_june_2019_df['pickup_time']<='17:00:00')]
yt_june_day_count = yt_day_june_19_df.shape[0]

yt_evening_june_19_df = yt_june_2019_df.loc[(yt_june_2019_df['pickup_time'] > '17:00:00') & (yt_june_2019_df['pickup_time']<='23:00:00')]
yt_june_eve_count = yt_evening_june_19_df.shape[0]

yt_late_night_june_19_df = yt_june_2019_df.loc[(yt_june_2019_df['pickup_time'] > '23:00:00')]
yt_june_ln_count = yt_late_night_june_19_df.shape[0]

yt_later_night_june_19_df = yt_june_2019_df.loc[(yt_june_2019_df['pickup_time'] <= '05:00:00')]
yt_june_lrn_count = yt_later_night_june_19_df.shape[0]

yt_june_afthrs_count = yt_june_lrn_count + yt_june_ln_count

In [None]:
# Plot bar chart to compare yellow cabs & for hire vehicles in June of 2015

fhv_June_2015 = [fhv_june_morn_count, fhv_june_day_count, fhv_june_eve_count, fhv_june_afthrs_count]
yt_June_2015 = [yt_june_morn_count, yt_june_day_count, yt_june_eve_count, yt_june_afthrs_count]
index = ["5am-11am", "11am-5pm", "5pm-11pm", "11pm-5am"]
df = pd.DataFrame({'Taxis': yt_June_2015,
                   'For Hire': fhv_June_2015}, index=index)

df.plot.bar(rot=0)
plt.title("For Hire vs Taxis in Manhattan - June 2015")
plt.ylabel("Number of Pickups")
plt.xlabel("Time of Day")
plt.axhline(y = 50000, color = '0', linestyle = 'dashed')

# Plot bar chart to compare yellow cabs & for hire vehicles in December of 2015

fhv_Dec_2015 = [fhv_dec_morn_count, fhv_dec_day_count, fhv_dec_eve_count, fhv_dec_afthrs_count]
yt_Dec_2015 = [yt_dec_morn_count, yt_dec_day_count, yt_dec_eve_count, yt_dec_afthrs_count]
index = ["5am-11am", "11am-5pm", "5pm-11pm", "11pm-5am"]
df = pd.DataFrame({'Taxis': yt_Dec_2015,
                   'For Hire': fhv_Dec_2015}, index=index)


df.plot.bar(rot=0)
plt.title("For Hire vs Taxis in Manhattan - December 2015")
plt.ylabel("Number of Pickups")
plt.xlabel("Time of Day")
plt.axhline(y = 50000, color = '0', linestyle = 'dashed')

In [None]:
# Plot bar chart to compare yellow cabs & high frequency for hire vehicles in June of 2019

hv_June_2019 = [hv_june_morn_count, hv_june_day_count, hv_june_eve_count, hv_june_afthrs_count]
yt_June_2019 = [yt_june_morn_count, yt_june_day_count, yt_june_eve_count, yt_june_afthrs_count]
index = ["5am-11am", "11am-5pm", "5pm-11pm", "11pm-5am"]
df = pd.DataFrame({'Taxis': yt_June_2019,
                   'For Hire': hv_June_2019}, index=index)

df.plot.bar(rot=0)
plt.title("For Hire vs Taxis in Manhattan - June 2019")
plt.ylabel("Number of Pickups")
plt.xlabel("Time of Day")
plt.axhline(y = 50000, color = '0', linestyle = 'dashed')


# Plot bar chart to compare yellow cabs & high frequency for hire vehicles in December of 2019

hv_Dec_2019 = [hv_dec_morn_count, hv_dec_day_count, hv_dec_eve_count, hv_dec_afthrs_count]
yt_Dec_2019 = [yt_dec_morn_count, yt_dec_day_count, yt_dec_eve_count, yt_dec_afthrs_count]
index = ["5am-11am", "11am-5pm", "5pm-11pm", "11pm-5am"]
df = pd.DataFrame({'Taxis': yt_Dec_2019,
                   'For Hire': hv_Dec_2019}, index=index)

df.plot.bar(rot=0)
plt.title("For Hire vs Taxis in Manhattan - December 2019")
plt.ylabel("Number of Pickups")
plt.xlabel("Time of Day")
plt.axhline(y = 50000, color = '0', linestyle = 'dashed')

# Pickup locations heatmap

In [None]:
taxi_june_2015 = pd.read_csv("../../NYC Taxi & FHV Data/clean_csvs/taxi_june_2015.csv")

In [None]:
locations = taxi_june_2015[["pickup_latitude","pickup_longitude"]]

In [None]:
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations)
                                 

fig.add_layer(heat_layer)

fig

In [None]:
june_2019_taxi.head()

In [None]:
june_2019_taxi_df = june_2019_taxi.rename(columns={'PULocationID': 'LocationID'})
june_2019_taxi_df

In [None]:
manhattan_zone_locations = pd.read_csv("../../../Desktop/NYC Taxi & FHV Data/Manhattan_pickup_loc.csv")

In [None]:
merged_june_2019_taxi = june_2019_taxi_df.merge(manhattan_zone_locations, how='left', on= "LocationID")


In [None]:
merged_june_2019_taxi = merged_june_2019_taxi.dropna()
merged_june_2019_taxi

In [None]:
locations = merged_june_2019_taxi[["Lat", "Long"]]
pickup_rate = merged_june_2019_taxi["passenger_count"].astype(float)

In [None]:
taxi_2019_fig = gmaps.figure()

# heat_layer = gmaps.heatmap_layer(locations, weights=pickup_rate, 
#                                  dissipating=True, max_intensity=100,
#                                  point_radius = 1)
                                 
info_box_template = """
<dl>
<dt>Name</dt><dd>{Zone}</dd>
<dt>Lat</dt><dd>{Lat}</dd>
<dt>Long</dt><dd>{Long}</dd>
</dl>
"""

pickup_info = [info_box_template.format(**row) for index, row in merged_june_2019_taxi.iterrows()]

markers = gmaps.marker_layer(locations, info_box_content = pickup_info)

heat_layer = gmaps.heatmap_layer(locations, info_box_content = pickup_info, weights=pickup_rate, 
                                 dissipating=True, max_intensity=100,
                                 point_radius = 1)

taxi_2019_fig.add_layer(markers)

    
taxi_2019_fig.add_layer(heat_layer)

taxi_2019_fig