# Persisting Time Series Data to Files

In [34]:
import pandas as pd
from pathlib import Path

## Writing to a pickle file using Pandas

In [41]:
file = Path('../../datasets/Ch4/time_series_covid19_confirmed_global.csv')
df = pd.read_csv(file)

In [42]:
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,9/19/21,9/20/21,9/21/21,9/22/21,9/23/21,9/24/21,9/25/21,9/26/21,9/27/21,9/28/21
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,154487,154585,154712,154757,154800,154960,154960,154960,155072,155093
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,162953,163404,164276,165096,165864,166690,167354,167893,168188,168782
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,201600,201766,201948,202122,202283,202449,202574,202722,202877,203045
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,15124,15140,15140,15153,15156,15167,15167,15167,15189,15192
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,52307,52644,52968,53387,53840,54280,54795,55121,55583,56040


In [49]:
# filter data where Country is United States
df_usa = df[df['Country/Region'] == 'US']
# filter columns from June to end of September
df_usa_summer = df_usa.loc[:, '6/1/20':'9/30/20']
# pivot the data 
df_usa_summer

Unnamed: 0,6/1/20,6/2/20,6/3/20,6/4/20,6/5/20,6/6/20,6/7/20,6/8/20,6/9/20,6/10/20,...,9/21/20,9/22/20,9/23/20,9/24/20,9/25/20,9/26/20,9/27/20,9/28/20,9/29/20,9/30/20
254,1816679,1837948,1857728,1879463,1904375,1925849,1944117,1960815,1978823,2000019,...,6860254,6900324,6939235,6986604,7034971,7079902,7117964,7150631,7194195,7233295


In [62]:
# unpivot using pd.melt()
df_usa_summer_unpivoted = pd.melt(df_usa_summer,
                                  value_vars=df_usa_summer.columns,
                                  value_name='cases',
                                  var_name='date').set_index('date')
df_usa_summer_unpivoted.index = pd.to_datetime(df_usa_summer_unpivoted.index)
df_usa_summer_unpivoted.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 122 entries, 2020-06-01 to 2020-09-30
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   cases   122 non-null    int64
dtypes: int64(1)
memory usage: 1.9 KB


In [64]:
df_usa_summer_unpivoted.head()

Unnamed: 0_level_0,cases
date,Unnamed: 1_level_1
2020-06-01,1816679
2020-06-02,1837948
2020-06-03,1857728
2020-06-04,1879463
2020-06-05,1904375


In [65]:
output = Path('../../datasets/Ch4/covid_usa_summer_2020.pkl')
df_usa_summer_unpivoted.to_pickle(output)

In [66]:
unpickled_df = pd.read_pickle(output)
unpickled_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 122 entries, 2020-06-01 to 2020-09-30
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   cases   122 non-null    int64
dtypes: int64(1)
memory usage: 1.9 KB


## Writing a Pickle file using the Pickle library

In [68]:
import pickle

In [None]:
pickle