In [2]:
import numpy as np
import pandas as pd

In [3]:
# We can tell the .read_csv() method to parse dates as date-time objects using the 'parse_dates=True'
# option. This automatically creates a DateTimeIndex
col_names = ['year', 'month', 'day', 'dec_date', 'sunspots', 'standard_dev', 'nobs', 'definite']
sunspots = pd.read_csv("data/SN_d_tot_V2.0.csv", 
                       header=None, 
                       sep=';', 
                       names=col_names,
                       na_values = {'sunspots':['  -1'], 'standard_dev':[' -1.0']},
                       parse_dates=[[0,1,2]])



In [4]:
sunspots.head()

Unnamed: 0,year_month_day,dec_date,sunspots,standard_dev,nobs,definite
0,1818-01-01,1818.001,,,0,1
1,1818-01-02,1818.004,,,0,1
2,1818-01-03,1818.007,,,0,1
3,1818-01-04,1818.01,,,0,1
4,1818-01-05,1818.012,,,0,1


In [5]:
sunspots.index = sunspots['year_month_day']

In [6]:
sunspots.head()

Unnamed: 0_level_0,year_month_day,dec_date,sunspots,standard_dev,nobs,definite
year_month_day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1818-01-01,1818-01-01,1818.001,,,0,1
1818-01-02,1818-01-02,1818.004,,,0,1
1818-01-03,1818-01-03,1818.007,,,0,1
1818-01-04,1818-01-04,1818.01,,,0,1
1818-01-05,1818-01-05,1818.012,,,0,1


In [10]:
sunspots['August 1818'].head()

Unnamed: 0_level_0,year_month_day,dec_date,sunspots,standard_dev,nobs,definite
year_month_day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1818-08-01,1818-08-01,1818.582,,,0,1
1818-08-02,1818-08-02,1818.585,47.0,8.7,1,1
1818-08-03,1818-08-03,1818.588,28.0,6.7,1,1
1818-08-04,1818-08-04,1818.59,17.0,5.2,1,1
1818-08-05,1818-08-05,1818.593,17.0,5.2,1,1


In [11]:
sunspots.loc['August 1818'].head()

Unnamed: 0_level_0,year_month_day,dec_date,sunspots,standard_dev,nobs,definite
year_month_day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1818-08-01,1818-08-01,1818.582,,,0,1
1818-08-02,1818-08-02,1818.585,47.0,8.7,1,1
1818-08-03,1818-08-03,1818.588,28.0,6.7,1,1
1818-08-04,1818-08-04,1818.59,17.0,5.2,1,1
1818-08-05,1818-08-05,1818.593,17.0,5.2,1,1


In [13]:
# The index and date caapabilities of pandas are remarkable. It can even understand things like:
sunspots.loc['August 1818': 'September 1818'].count()

year_month_day    61
dec_date          61
sunspots          38
standard_dev      38
nobs              61
definite          61
dtype: int64

In [14]:
# or even:
sunspots.loc['1818-08-01': '1818-08-15']

Unnamed: 0_level_0,year_month_day,dec_date,sunspots,standard_dev,nobs,definite
year_month_day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1818-08-01,1818-08-01,1818.582,,,0,1
1818-08-02,1818-08-02,1818.585,47.0,8.7,1,1
1818-08-03,1818-08-03,1818.588,28.0,6.7,1,1
1818-08-04,1818-08-04,1818.59,17.0,5.2,1,1
1818-08-05,1818-08-05,1818.593,17.0,5.2,1,1
1818-08-06,1818-08-06,1818.596,22.0,5.9,1,1
1818-08-07,1818-08-07,1818.599,32.0,7.1,1,1
1818-08-08,1818-08-08,1818.601,63.0,10.1,1,1
1818-08-09,1818-08-09,1818.604,58.0,9.7,1,1
1818-08-10,1818-08-10,1818.607,67.0,10.3,1,1


### Reindexing the Index ###
Reindexing is useful in preparation for adding or otherwise combining two timeseries datasets. To reindex the data, we provide a new index and ask pandas to try and match the old data to the new index.<br>
If data is unavailable for one of the new index dates, pandas will fill with `Nan` by default - or we can tell pandas what to do with them using the `method` argument.

### Resampling Pandas time series ###
Resampling is similar to groupby for timeseries. Downsampling aggregates time level data to a 'coarser' level. E.g. having hourly data aggregated to daily. The opposite is also possible, i.e. going down to a 'finer' time level.


In [16]:
# Downsample to 6 hour data and aggregate by mean: df1
df1 = df.loc[:,'Temperature'].resample('6h').mean()

# Downsample to daily data and count the number of data points: df2
df2 = df.loc[:, "Temperature"].resample('D').count()

### Rolling mean and frequency ###
Rolling means - also called moving averages - can be easily calculated using the rolling() method. Method chaining is required, after the rolling() to indicate the type of aggregation over the specified time window:

In [None]:
unsmoothed = df['Temperature']['2010-08-01':'2010-08-15']
smoothed = unsmoothed.rolling(window=24).mean()