In [1]:
import pandas as pd
import altair as alt



# COVID-19 Comfirmed Cases

In [2]:
# read file, only keep 3 useful columns
covid = pd.read_csv('outputs/ny_covid19_positive.csv')[['date', 'positive', 'positiveIncrease']]
covid.columns = ['date', 'confirmed cases number', 'Increased cases number'] 

# Adding data of March 1st
covid.loc[271] = ['2020-03-01', 0, 0]

# convert wide form data to long form data to draw multiple lines in one graph
covid_long_form = covid.melt('date', var_name='metric', value_name='number')
covid_long_form

Unnamed: 0,date,metric,number
0,2020-11-27,confirmed cases number,628375
1,2020-11-26,confirmed cases number,620199
2,2020-11-25,confirmed cases number,613266
3,2020-11-24,confirmed cases number,607001
4,2020-11-23,confirmed cases number,602120
...,...,...,...
539,2020-03-05,Increased cases number,2
540,2020-03-04,Increased cases number,0
541,2020-03-03,Increased cases number,1
542,2020-03-02,Increased cases number,0


In [3]:
covid_chart = alt.Chart(covid_long_form, title="Confirmed / Increased Cases in NYC").mark_line().encode(
    x=alt.X("date:T"),
    y="number:Q",
    color="metric:N",
    tooltip=list(covid_long_form.columns)
).properties(
    width=500,
    height=500
).interactive()

covid_chart.encoding.x.title = "Month"
covid_chart.encoding.y.title = "Confirmed / Increased Cases Number"

covid_chart

# FHV

In [4]:
# read for hire vehicle data
fhv_2019 = pd.read_csv('outputs/fhv_2019.csv')
fhv_2020_covid = pd.read_csv('outputs/fhv_2020_covid.csv')

# concat 2019 & 2020 datasets
fhv_agg = pd.concat([fhv_2019, fhv_2020_covid], axis=1)

## Relationship between confirmed cases and for hire vehicle order number

In [5]:
fhv_chart = alt.Chart(fhv_2020_covid, title="FHV Order Number's Change With Confirmed Cases").mark_line().encode(
    x="positive:Q",
    y="count2020:Q",
    tooltip=list(fhv_2020_covid.columns)
).properties(
    width=500,
    height=250
).interactive()

fhv_chart.encoding.x.title = "Confirmed Cases Number"
fhv_chart.encoding.y.title = "Order Number"

fhv_chart

## Changes in the number of  FHV orders over time, in comparison with 2019

In [6]:
# because date are the same, so only leave date-2020
fhv_count = fhv_agg[['date2020', 'count2020', 'count2019', 'positive']]
fhv_count.columns = ['date', '2020 order number', '2019 order number', 'confirmed cases number'] 

# convert wide form data to long form data to draw multiple lines in one graph
fhv_count_long_form = fhv_count.melt('date', var_name='type', value_name='number')
fhv_count_long_form

Unnamed: 0,date,type,number
0,2020-03-01,2020 order number,44800
1,2020-03-02,2020 order number,65168
2,2020-03-03,2020 order number,68632
3,2020-03-04,2020 order number,69476
4,2020-03-05,2020 order number,69880
...,...,...,...
361,2020-06-26,confirmed cases number,391220
362,2020-06-27,confirmed cases number,391923
363,2020-06-28,confirmed cases number,392539
364,2020-06-29,confirmed cases number,392930


In [7]:
fhv_count_chart = alt.Chart(fhv_count_long_form, title="FHV Order Number Comparison Between 2019 & 2020").mark_line().encode(
    x=alt.X("date:T"),
    y="number:Q",
    color='type:N',
    tooltip=list(fhv_count_long_form.columns)
).properties(
    width=500,
    height=500
).interactive()

fhv_count_chart.encoding.x.title = "Month"
fhv_count_chart.encoding.y.title = "Order Number / Confirmed Cases"

fhv_count_chart

# Yellow Taxi

In [8]:
# read yellow taxi data
yellow_2019 = pd.read_csv('outputs/yellow_2019.csv')
yellow_2020_covid = pd.read_csv('outputs/yellow_2020_covid.csv')

# concat 2019 & 2020 datasets
yellow_agg = pd.concat([yellow_2019, yellow_2020_covid], axis=1)

## Relationship between confirmed cases and taxi order number

In [9]:
yellow_chart = alt.Chart(yellow_2020_covid, title="Yellow Taxi Order Number's Change With Confirmed Cases").mark_line().encode(
    x=alt.X("positive:Q"),
    y="count2020:Q",
    tooltip=list(yellow_2020_covid.columns)
).properties(
    width=500,
    height=250
).interactive()

yellow_chart.encoding.x.title = "Confirmed Cases Number"
yellow_chart.encoding.y.title = "Order Number"

yellow_chart

## Relationship between confirmed cases and taxi average passengers

In [10]:
yellow_avg_chart = alt.Chart(yellow_2020_covid, title="Yellow Taxi Average passengers' Change With Confirmed Cases").mark_line().encode(
    x=alt.X("positive:Q"),
    y="avg2020:Q",
    tooltip=list(yellow_2020_covid.columns)
).properties(
    width=500,
    height=250
).interactive()

yellow_avg_chart.encoding.x.title = "Confirmed Cases Number"
yellow_avg_chart.encoding.y.title = "Average passengers"

yellow_avg_chart

## Changes in the number of  taxi orders over time, in comparison with 2019

In [11]:
# because date are the same, so only leave date-2020
yellow_count = yellow_agg[['date2020', 'count2020', 'count2019', 'positive']]
yellow_count.columns = ['date', '2020 order number', '2019 order number', 'confirmed cases number'] 

# convert wide form data to long form data to draw multiple lines in one graph
yellow_count_long_form = yellow_count.melt('date', var_name='type', value_name='number')
yellow_count_long_form

Unnamed: 0,date,type,number
0,2020-03-01,2020 order number,179723
1,2020-03-02,2020 order number,193508
2,2020-03-03,2020 order number,222917
3,2020-03-04,2020 order number,229734
4,2020-03-05,2020 order number,244448
...,...,...,...
361,2020-06-26,confirmed cases number,391220
362,2020-06-27,confirmed cases number,391923
363,2020-06-28,confirmed cases number,392539
364,2020-06-29,confirmed cases number,392930


In [12]:
yellow_count_chart = alt.Chart(yellow_count_long_form, title="Yellow Taxi Order Number Comparison Between 2019 & 2020").mark_line().encode(
    x=alt.X("date:T"),
    y="number:Q",
    color='type:N',
    tooltip=list(yellow_count_long_form.columns)
).properties(
    width=500,
    height=500
).interactive()

yellow_count_chart.encoding.x.title = "Month"
yellow_count_chart.encoding.y.title = "Order Number / Confirmed Cases"

yellow_count_chart

## Changes in the number of  average passenger over time, in comparison with 2019

In [13]:
# only look at avg (not count)
yellow_avg = yellow_agg[['date2020', 'avg2020', 'avg2019']]
yellow_avg.columns = ['date', '2020 average passengers number', '2019 average passengers number'] 

# convert wide form data to long form data to draw multiple lines in one graph
yellow_avg_long_form = yellow_avg.melt('date', var_name='type', value_name='number')
yellow_avg_long_form

Unnamed: 0,date,type,number
0,2020-03-01,2020 average passengers number,1.556861
1,2020-03-02,2020 average passengers number,1.462445
2,2020-03-03,2020 average passengers number,1.457159
3,2020-03-04,2020 average passengers number,1.448739
4,2020-03-05,2020 average passengers number,1.457315
...,...,...,...
239,2020-06-26,2019 average passengers number,1.525198
240,2020-06-27,2019 average passengers number,1.530668
241,2020-06-28,2019 average passengers number,1.555708
242,2020-06-29,2019 average passengers number,1.634292


In [14]:
yellow_avg_chart = alt.Chart(yellow_avg_long_form, title="Yellow Taxi Average Passengers Comparison Between 2019 & 2020").mark_line().encode(
    x=alt.X("date:T"),
    y="number:Q",
    color='type:N',
    tooltip=list(yellow_avg_long_form.columns)
).properties(
    width=500,
    height=250
).interactive()

yellow_avg_chart.encoding.x.title = "Month"
yellow_avg_chart.encoding.y.title = "Order Number / Confirmed Cases"

yellow_avg_chart