# Loading Data from CSV file

* The Pandas library in Python provides excellent, built-in support for time series data.
* Pandas represents time series datasets as a **Series**. 
* A Series is a one-dimensional array with a time label for each row.
* A Dataframe is a collection of series

In [None]:
import pandas as pd
import numpy as np

In [None]:
# Download csv file from resources and put it in working directory
dataframe = pd.read_csv('daily-total-female-births-CA.csv', header=0)

#### First five records

In [None]:
dataframe.head()

#### Data Type

In [None]:
dataframe['date'].dtype

### Loading data with parse_dates

In [None]:
df2 = pd.read_csv('daily-total-female-births-CA.csv', header=0 , parse_dates=[0])

#### First five records

In [None]:
df2.head()

#### Data Type

In [None]:
df2['date'].dtype

#### Note
dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S')

df = pd.read_csv(.........,parse_dates=[0], date_parser=dateparse)

https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior

### Loading Data as a series

In [None]:
series = pd.read_csv('daily-total-female-births-CA.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)

#### First 5 records

In [None]:
series.head()

# Exploring Time Series Data

#### Size

In [None]:
series.shape

In [None]:
df2.shape

### Querying by time

In [None]:
print(series['1959-01'])

In [None]:
df2[(df2['date'] > '1959-01-01') & (df2['date'] <= '1959-01-21')]

### Descriptive Statistics

In [None]:
series.describe()

In [None]:
df2.describe()

# Feature Engineering

* Date time features
* Lag features
* Window features
* Expanding feature


### Date time features

In [None]:
df2.head(5)

In [None]:
features = df2.copy()

In [None]:
features['year'] = df2['date'].dt.year

In [None]:
features['month'] = df2['date'].dt.month

In [None]:
features['day'] = df2['date'].dt.day

In [None]:
df8=features[{"year":df2['date'].dt.year,"month":df2['date'].dt.month,"day":df2['date'].dt.day}]

In [None]:
df2['date'].dt.day

In [None]:
features[['new',"year",'Date']]=df2['date'].dt.month,df2['date'].dt.year[0:364],df2['date'].dt.day

In [None]:
features.tail(5)

For more detail : https://pandas.pydata.org/pandas-docs/stable//reference/series.html#datetimelike-properties

### Lag features

To create lag1 feature

In [None]:
features['lag2'] =  df2['births'].shift(2)

In [None]:
features['lag2'] =  df2['births'].shift(365)

In [None]:
features.head(7)

For more detail : https://pandas.pydata.org/pandas-docs/stable//reference/api/pandas.Series.shift.html#pandas.Series.shift

### Window features

In [None]:
features['Roll_mean'] = df2['births'].rolling(window = 2).mean()

In [None]:
features.head(5)

In [None]:
features['Roll_max'] = df2['births'].rolling(window = 3).max()

In [None]:
features.head(5)

For more detail : https://pandas.pydata.org/pandas-docs/stable//reference/api/pandas.Series.rolling.html#pandas.Series.rolling

### Expanding features

In [None]:
features['Expand_max'] = df2['births'].expanding().max()

In [None]:
features.head(10)

For more detail : https://pandas.pydata.org/pandas-docs/stable//reference/api/pandas.Series.expanding.html#pandas.Series.expanding

# Time Series - Data Visualization

In [None]:
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
Dataviz_df = df2.copy()

In [None]:
Dataviz_df.head(10)

In [None]:
Dataviz_df['births'].plot()

In [None]:
Dataviz_df.index = Dataviz_df['date'] 

In [None]:
Dataviz_df['births'].plot()

### Zooming In

In [None]:
Dataviz_df2 = Dataviz_df[(Dataviz_df['date'] > '1959-01-01') & (Dataviz_df['date'] <= '1959-01-10')].copy()

In [None]:
Dataviz_df2

In [None]:
Dataviz_df2['births'].plot()

### Trendline

In [None]:
import seaborn as sns

In [None]:
sns.regplot(x= df2.index.values, y=df2['births'])

In [None]:
sns.regplot(x= df2.index.values, y=df2['births'], order =7)

In [None]:
miles_df = pd.read_csv('us-airlines-monthly-aircraft-miles-flown.csv', header=0 , parse_dates=[0])

In [None]:
miles_df.head()

In [None]:
miles_df['MilesMM'].plot()

In [None]:
sns.regplot(x= miles_df.index.values, y=miles_df['MilesMM'])

### Removing Seasonality

In [None]:
miles_df['year'] = miles_df['Month'].dt.year

In [None]:
miles_df.head()

In [None]:
miles_df.groupby('year')['MilesMM'].mean()

In [None]:
miles_df.groupby('year')['MilesMM'].mean().plot()

### Creating Lag plots

In [None]:
miles_df['lag1'] =  miles_df['MilesMM'].shift(1)

In [None]:
miles_df.head()

In [None]:
sns.scatterplot(x=miles_df['lag1'], y=miles_df['MilesMM'])

In [None]:
from pandas.plotting import lag_plot

In [None]:
lag_plot(miles_df['MilesMM'])

### Autocorrelation Plots

In [None]:
from pandas.plotting import autocorrelation_plot

In [None]:
autocorrelation_plot(miles_df['MilesMM'])

## Downsampling and Upsampling

In [None]:
miles_df = pd.read_csv('us-airlines-monthly-aircraft-miles-flown.csv', header=0 , parse_dates=[0])

In [None]:
miles_df.head()

### Downsampling

In [None]:
quarterly_miles_df = miles_df.resample('Q', on='Month').mean()

In [None]:
quarterly_miles_df.head()

In [None]:
yearly_total_miles_df = miles_df.resample('A', on='Month').sum()

In [None]:
yearly_total_miles_df.head()

| Alias  | Description           |
|--------|-----------------------|
| B      | Business day          |
| D      | Calendar day          |
| W      | Weekly                |
| M      | Month end             |
| Q      | Quarter end           |
| A      | Year end              |
| BA     | Business year end     |
| AS     | Year start            |
| H      | Hourly frequency      |
| T, min | Minutely frequency    |
| S      | Secondly frequency    |
| L, ms  | Millisecond frequency |
| U, us  | Microsecond frequency |
| N, ns  | Nanosecond frequency  |

### Upsampling

In [None]:
upsampled_miles_df = miles_df.resample('D', on='Month').mean()

In [None]:
upsampled_miles_df.head(35)

In [None]:
upsampled_miles_df.plot()

In [None]:
interpolated_miles_df = upsampled_miles_df.interpolate(method='linear')

In [None]:
interpolated_miles_df.head(32)

In [None]:
interpolated_miles_df.head(35)

In [None]:
interpolated_miles_df.plot()

In [None]:
poly_interpolated_miles_df = upsampled_miles_df.interpolate(method='spline', order=2)

In [None]:
poly_interpolated_miles_df.plot()

| Method  | Description                                               |
|---------|-----------------------------------------------------------|
| bfill   | Backward fill                                             |
| count   | Count of values                                           |
| ffill   | Forward fill                                              |
| first   | First valid data value                                    |
| last    | Last valid data value                                     |
| max     | Maximum data value                                        |
| mean    | Mean of values in time range                              |
| median  | Median of values in time range                            |
| min     | Minimum data value                                        |
| nunique | Number of unique values                                   |
| ohlc    | Opening value, highest value, lowest value, closing value |
| pad     | Same as forward fill                                      |
| std     | Standard deviation of values                              |
| sum     | Sum of values                                             |
| var     | Variance of values                                        |

## Decomposing Time Series

**Additive Model** 

y(t) = Level + Trend + Seasonality + Noise

**Multiplicative Model** 

y(t) = Level * Trend * Seasonality * Noise

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
miles_decomp_df = pd.read_csv('us-airlines-monthly-aircraft-miles-flown.csv', header=0 , parse_dates=[0])

In [None]:
miles_decomp_df.head()

In [None]:
miles_decomp_df.index = miles_decomp_df['Month'] 

In [None]:
result = seasonal_decompose(miles_decomp_df['MilesMM'], model='additive')

In [None]:
result.plot();

In [None]:
result2 = seasonal_decompose(miles_decomp_df['MilesMM'], model='multiplicative')

In [None]:
result2.plot();

### Differencing

In [None]:
miles_df = pd.read_csv('us-airlines-monthly-aircraft-miles-flown.csv', header=0 , parse_dates=[0])

In [None]:
miles_df.head()

In [None]:
miles_df['lag1'] =  miles_df['MilesMM'].shift(1)

In [None]:
miles_df['MilesMM_diff_1'] = miles_df['MilesMM'].diff(periods=1)

In [None]:
miles_df.head()

In [None]:
miles_df.index = miles_df['Month'] 
result_a = seasonal_decompose(miles_df['MilesMM'], model='additive')
result_a.plot();

In [None]:
miles_df

In [None]:
miles_df.index = miles_df['Month'] 
result_b = seasonal_decompose(miles_df.iloc[1:,3], model='additive')
result_b.plot();

In [None]:
miles_df['MilesMM'].plot()

In [None]:
miles_df['MilesMM_diff_1'].plot() 

In [None]:
miles_df['MilesMM_diff_12'] = miles_df['MilesMM_diff_1'].diff(periods=12)

In [None]:
miles_df['MilesMM_diff_12'].plot() 

In [None]:
result_c = seasonal_decompose(miles_df.iloc[13:,4], model='additive')
result_c.plot();