In [1]:
import pandas as pd
import altair as alt



# COVID-19 Positive

In [2]:
covid = pd.read_csv('outputs/ny_covid19_positive.csv')[['date', 'positive', 'positiveIncrease']]
# Adding data of March 1st
covid.loc[271] = ['2020-03-01', 0, 0]

In [3]:
alt.Chart(covid, title="Relationship Between Confirmed Cases and Time in NYC").mark_line().encode(
    x=alt.X("date:T", scale=alt.Scale(domain=['2020-03-01', '2020-10-31'])),
    y="positive",
    tooltip=list(covid.columns)
).properties(
    width=500,
    height=500
).interactive()

# Flight

In [4]:
flight_2019 = pd.read_csv('outputs/flight_2019.csv')
flight_2020_covid = pd.read_csv('outputs/flight_2020_covid.csv')

In [5]:
flight_long_form = flight_2020_covid.melt('date', var_name='type', value_name='number')

In [6]:
alt.Chart(flight_long_form, title="Relationship Between Number of Flights and Time in KJFK").mark_line().encode(
    x=alt.X("date:T"),
    y="number",
    color='type',
    tooltip=list(flight_long_form.columns)
).properties(
    width=500,
    height=500
).interactive()

# FHV

In [7]:
fhv_2019 = pd.read_csv('outputs/fhv_2019.csv')
fhv_2020_covid = pd.read_csv('outputs/fhv_2020_covid.csv')

In [8]:
alt.Chart(fhv_2020_covid, title="FHV Order Number's Change With Confirmed Cases").mark_line().encode(
    x=alt.X("positive:Q"),
    y="count2020:Q",
    tooltip=list(fhv_2020_covid.columns)
).properties(
    width=500,
    height=250
).interactive()

In [9]:
fhv_agg = pd.concat([fhv_2019, fhv_2020_covid], axis=1)
fhv_agg

Unnamed: 0,date,count2019,date2020,count2020,positive
0,2019-03-01,45851,2020-03-01,44800,0
1,2019-03-02,35839,2020-03-02,65168,0
2,2019-03-03,34178,2020-03-03,68632,1
3,2019-03-04,25005,2020-03-04,69476,1
4,2019-03-05,40853,2020-03-05,69880,3
...,...,...,...,...,...
117,2019-06-26,71188,2020-06-26,37655,391220
118,2019-06-27,69925,2020-06-27,29040,391923
119,2019-06-28,67683,2020-06-28,23562,392539
120,2019-06-29,56556,2020-06-29,37264,392930


In [10]:
# only look at count
fhv_count = fhv_agg[['date2020', 'count2020', 'count2019', 'positive']]
fhv_count

Unnamed: 0,date2020,count2020,count2019,positive
0,2020-03-01,44800,45851,0
1,2020-03-02,65168,35839,0
2,2020-03-03,68632,34178,1
3,2020-03-04,69476,25005,1
4,2020-03-05,69880,40853,3
...,...,...,...,...
117,2020-06-26,37655,71188,391220
118,2020-06-27,29040,69925,391923
119,2020-06-28,23562,67683,392539
120,2020-06-29,37264,56556,392930


In [11]:
fhv_count_long_form = fhv_count.melt('date2020', var_name='year', value_name='count')
fhv_count_long_form

Unnamed: 0,date2020,year,count
0,2020-03-01,count2020,44800
1,2020-03-02,count2020,65168
2,2020-03-03,count2020,68632
3,2020-03-04,count2020,69476
4,2020-03-05,count2020,69880
...,...,...,...
361,2020-06-26,positive,391220
362,2020-06-27,positive,391923
363,2020-06-28,positive,392539
364,2020-06-29,positive,392930


In [12]:
alt.Chart(fhv_count_long_form, title="FHV Order Number Comparison Between 2019 & 2020").mark_line().encode(
    x=alt.X("date2020:T"),
    y="count:Q",
    color='year:N',
    tooltip=list(fhv_count_long_form.columns)
).properties(
    width=500,
    height=500
).interactive()

# Yellow Taxi

In [13]:
yellow_2019 = pd.read_csv('outputs/yellow_2019.csv')
yellow_2020_covid = pd.read_csv('outputs/yellow_2020_covid.csv')

In [14]:
alt.Chart(yellow_2020_covid, title="Yellow Taxi Order Number's Change With Confirmed Cases").mark_line().encode(
    x=alt.X("positive:Q"),
    y="count2020:Q",
    tooltip=list(yellow_2020_covid.columns)
).properties(
    width=500,
    height=250
).interactive()

In [15]:
alt.Chart(yellow_2020_covid, title="Yellow Taxi Order Number's Change With Confirmed Cases").mark_line().encode(
    x=alt.X("positive:Q"),
    y="avg2020:Q",
    tooltip=list(yellow_2020_covid.columns)
).properties(
    width=500,
    height=250
).interactive()

In [16]:
yellow_agg = pd.concat([yellow_2019, yellow_2020_covid], axis=1)
yellow_agg

Unnamed: 0,date,count2019,avg2019,date2020,count2020,avg2020,positive
0,2019-03-01,281130,1.568730,2020-03-01,179723,1.556861,0
1,2019-03-02,251510,1.641788,2020-03-02,193508,1.462445,0
2,2019-03-03,221835,1.621832,2020-03-03,222917,1.457159,1
3,2019-03-04,208014,1.552492,2020-03-04,229734,1.448739,1
4,2019-03-05,267813,1.543043,2020-03-05,244448,1.457315,3
...,...,...,...,...,...,...,...
117,2019-06-26,249641,1.525198,2020-06-26,26228,1.394282,391220
118,2019-06-27,253765,1.530668,2020-06-27,17215,1.393957,391923
119,2019-06-28,242109,1.555708,2020-06-28,15201,1.386412,392539
120,2019-06-29,216025,1.634292,2020-06-29,24842,1.388419,392930


In [17]:
# only look at count (not avg)
yellow_count = yellow_agg[['date', 'count2020', 'count2019', 'positive']]
yellow_count

Unnamed: 0,date,count2020,count2019,positive
0,2019-03-01,179723,281130,0
1,2019-03-02,193508,251510,0
2,2019-03-03,222917,221835,1
3,2019-03-04,229734,208014,1
4,2019-03-05,244448,267813,3
...,...,...,...,...
117,2019-06-26,26228,249641,391220
118,2019-06-27,17215,253765,391923
119,2019-06-28,15201,242109,392539
120,2019-06-29,24842,216025,392930


In [18]:
yellow_count_long_form = yellow_count.melt('date', var_name='year', value_name='count')
yellow_count_long_form

Unnamed: 0,date,year,count
0,2019-03-01,count2020,179723
1,2019-03-02,count2020,193508
2,2019-03-03,count2020,222917
3,2019-03-04,count2020,229734
4,2019-03-05,count2020,244448
...,...,...,...
361,2019-06-26,positive,391220
362,2019-06-27,positive,391923
363,2019-06-28,positive,392539
364,2019-06-29,positive,392930


In [19]:
alt.Chart(yellow_count_long_form, title="Yellow Taxi Order Number Comparison Between 2019 & 2020").mark_line().encode(
    x=alt.X("date:T"),
    y="count:Q",
    color='year:N',
    tooltip=list(yellow_count_long_form.columns)
).properties(
    width=500,
    height=500
).interactive()

In [20]:
# only look at avg (not count)
yellow_avg = yellow_agg[['date', 'avg2020', 'avg2019']]
yellow_avg

Unnamed: 0,date,avg2020,avg2019
0,2019-03-01,1.556861,1.568730
1,2019-03-02,1.462445,1.641788
2,2019-03-03,1.457159,1.621832
3,2019-03-04,1.448739,1.552492
4,2019-03-05,1.457315,1.543043
...,...,...,...
117,2019-06-26,1.394282,1.525198
118,2019-06-27,1.393957,1.530668
119,2019-06-28,1.386412,1.555708
120,2019-06-29,1.388419,1.634292


In [21]:
yellow_avg_long_form = yellow_avg.melt('date', var_name='year', value_name='count')
yellow_avg_long_form

Unnamed: 0,date,year,count
0,2019-03-01,avg2020,1.556861
1,2019-03-02,avg2020,1.462445
2,2019-03-03,avg2020,1.457159
3,2019-03-04,avg2020,1.448739
4,2019-03-05,avg2020,1.457315
...,...,...,...
239,2019-06-26,avg2019,1.525198
240,2019-06-27,avg2019,1.530668
241,2019-06-28,avg2019,1.555708
242,2019-06-29,avg2019,1.634292


In [22]:
alt.Chart(yellow_avg_long_form, title="Yellow Taxi Average Passengers Comparison Between 2019 & 2020").mark_line().encode(
    x=alt.X("date:T"),
    y="count:Q",
    color='year:N',
    tooltip=list(yellow_avg_long_form.columns)
).properties(
    width=500,
    height=250
).interactive()