# Time series data Conversion for different requrements

In [1]:
import numpy as np
import pandas as pd
import datetime

In [2]:
# My full dataset is on daily riverflow or river discharge 
df = pd.read_csv('dailyRiverflow.csv', parse_dates = ['Date'])
df.head(2)

Unnamed: 0,Date,River_flow
0,1987-01-01,5810.0
1,1987-02-01,5680.0


In [3]:
#Setting date as index
df_ts = df.set_index('Date')
df_ts.sort_index(inplace = True)
df_ts

Unnamed: 0_level_0,River_flow
Date,Unnamed: 1_level_1
1987-01-01,5810.000000
1987-01-02,4190.000000
1987-01-03,3480.000000
1987-01-04,4790.000000
1987-01-05,9900.000000
...,...
2019-12-27,5720.905088
2019-12-28,5605.507045
2019-12-29,5576.854780
2019-12-30,5463.033799


## Slicing the year from 2001-jan-01 to 2001-dec-31

In [4]:
# slicing the yaer from 2001-jan-01 to 2001-dec-31
newdata = df_ts['2001-01-01':'2001-12-31']

In [5]:
newdata.head()

Unnamed: 0_level_0,River_flow
Date,Unnamed: 1_level_1
2001-01-01,6399.53
2001-01-02,4480.7
2001-01-03,3972.81
2001-01-04,2702.43
2001-01-05,10100.54


In [6]:
newdata.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 365 entries, 2001-01-01 to 2001-12-31
Data columns (total 1 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   River_flow  365 non-null    float64
dtypes: float64(1)
memory usage: 5.7 KB


In [6]:
# Now I'm converting daily data to monthly data but there is a point we have to give attention which is river flow showing 
## accross the last date of the each month.

newdata_monthly = newdata.asfreq('M', how='end')
newdata_monthly

Unnamed: 0_level_0,River_flow
Date,Unnamed: 1_level_1
2001-01-31,4204.58
2001-02-28,3975.88
2001-03-31,3110.66
2001-04-30,7192.21
2001-05-31,13072.43
2001-06-30,27752.03
2001-07-31,36839.03
2001-08-31,46294.7
2001-09-30,21901.98
2001-10-31,14413.46


In [7]:
# in monthly data I removed daily info from data index by to_period().
newdata_monthly.to_period()

Unnamed: 0_level_0,River_flow
Date,Unnamed: 1_level_1
2001-01,4204.58
2001-02,3975.88
2001-03,3110.66
2001-04,7192.21
2001-05,13072.43
2001-06,27752.03
2001-07,36839.03
2001-08,46294.7
2001-09,21901.98
2001-10,14413.46


## Converting daily to monthly by average

In [8]:
# from yearly data I converted it to monthly by taking whole month average.
newdata.resample('1M',closed='right', label='right').mean()

Unnamed: 0_level_0,River_flow
Date,Unnamed: 1_level_1
2001-01-31,9275.214839
2001-02-28,9378.952857
2001-03-31,8931.178065
2001-04-30,10845.589
2001-05-31,13752.598065
2001-06-30,22528.425667
2001-07-31,23860.266452
2001-08-31,26732.638387
2001-09-30,26011.170667
2001-10-31,19216.980323


In [9]:
# and finally, I removed the monthly last date from date index. It does not effect any values. Only for good look and making periodical.
newdata.resample('1M',closed='right', label='right').mean().to_period()

Unnamed: 0_level_0,River_flow
Date,Unnamed: 1_level_1
2001-01,9275.214839
2001-02,9378.952857
2001-03,8931.178065
2001-04,10845.589
2001-05,13752.598065
2001-06,22528.425667
2001-07,23860.266452
2001-08,26732.638387
2001-09,26011.170667
2001-10,19216.980323


## Converting daily to monthly by average

In [10]:
# We can take Montly data by whole month sum values.
newdata.resample('1M',closed='right', label='right').sum()

Unnamed: 0_level_0,River_flow
Date,Unnamed: 1_level_1
2001-01-31,287531.66
2001-02-28,262610.68
2001-03-31,276866.52
2001-04-30,325367.67
2001-05-31,426330.54
2001-06-30,675852.77
2001-07-31,739668.26
2001-08-31,828711.79
2001-09-30,780335.12
2001-10-31,595726.39


In [11]:
# whole month some to period data.
newdata.resample('1M',closed='right', label='right').sum().to_period()

Unnamed: 0_level_0,River_flow
Date,Unnamed: 1_level_1
2001-01,287531.66
2001-02,262610.68
2001-03,276866.52
2001-04,325367.67
2001-05,426330.54
2001-06,675852.77
2001-07,739668.26
2001-08,828711.79
2001-09,780335.12
2001-10,595726.39


In [12]:
# for just test purpose, checking  Only first month of the 2001 for mean and avaerage which is correct or not.
ts = df_ts['2001-01-01':'2001-01-31']

In [13]:
# Its actually matched up with above mean value calculation.
ts.mean()

River_flow    9275.214839
dtype: float64

In [14]:
# Its alos perfectly matched !
ts.sum()

River_flow    287531.66
dtype: float64

In [15]:
# Now I will check again my 2001-01-01  to 2001-12-31 which contain 12 months
newdata

Unnamed: 0_level_0,River_flow
Date,Unnamed: 1_level_1
2001-01-01,6399.53
2001-01-02,4480.70
2001-01-03,3972.81
2001-01-04,2702.43
2001-01-05,10100.54
...,...
2001-12-27,5567.79
2001-12-28,5807.72
2001-12-29,6067.00
2001-12-30,6309.81


## Converting a full year of daily to quarterly. It's very important for seasonal analysis.

In [17]:
# from yearly data I converted it to QUARTERLY by taking whole QUARTER average.
# For quarter, I choose Q-DEC BC its start from jan to dec as 1 - 4 quart
newdata.resample('Q-DEC',closed='right', label='right').mean()

Unnamed: 0_level_0,River_flow
Date,Unnamed: 1_level_1
2001-03-31,9188.987333
2001-06-30,15687.373407
2001-09-30,25529.512717
2001-12-31,13611.683696


In [18]:
# And again, for just test purpose, checking  Only first QUARTER (JAN-MAR) of the 2001 for mean and avaerage which is correct or not. 
q1 = df_ts['2001-01-01':'2001-03-31']

In [19]:
q1.head()

Unnamed: 0_level_0,River_flow
Date,Unnamed: 1_level_1
2001-01-01,6399.53
2001-01-02,4480.7
2001-01-03,3972.81
2001-01-04,2702.43
2001-01-05,10100.54


In [20]:
q1.tail()

Unnamed: 0_level_0,River_flow
Date,Unnamed: 1_level_1
2001-03-27,3124.68
2001-03-28,3110.66
2001-03-29,3110.66
2001-03-30,3110.66
2001-03-31,3110.66


In [21]:
q1.mean()

River_flow    9188.987333
dtype: float64

perfect ! which is actually same as quarterly mean with q1 mean output.