[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/HIL-HK/lets-plot-examples/master?filepath=demo%2Fdelhi_climate.ipynb)

[<img alt="nbviewer" src="https://raw.githubusercontent.com/jupyter/design/master/logos/Badges/nbviewer_badge.png" width="109" height="20">](https://nbviewer.jupyter.org/github/HIL-HK/lets-plot-examples/blob/master/demo/delhi_climate.ipynb)

In [1]:
import pandas as pd

from lets_plot import *; LetsPlot.setup_html()

# Time Series Visualizations

This notebook demonstrates how to use Lets-Plot to investigate time series.

The data is provided by [Kaggle](https://www.kaggle.com/sumanthvrao/daily-climate-time-series-data).

### Preparation

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/HIL-HK/lets-plot-examples/master/data/delhi_climate.csv')

In [3]:
df = df.rename(columns={'meantemp': 'mean_temp', 'meanpressure': 'mean_pressure'})
df.date = pd.to_datetime(df.date)
df['day'] = df.date.dt.day
df['month'] = df.date.dt.month
df['year'] = df.date.dt.year
df['day_of_year'] = df.date.dt.dayofyear
df = df.drop(columns=['date'])
df = df[df.year < 2017]

### Boxplot: General Information

In [4]:
p1 = ggplot() + \
    geom_boxplot(aes(x='year', y='mean_temp', color='year', fill='year'), data=df, size=2, alpha=.5) + \
    scale_x_discrete(name='year') + \
    scale_color_discrete() + scale_fill_discrete() + \
    ylab('mean temperature') + \
    ggtitle('Mean Temperature Aggregated') + \
    theme(legend_position='bottom')
p2 = ggplot() + \
    geom_boxplot(aes(x='month', y='mean_temp', color='year', fill='year'), data=df, size=.75, alpha=.5) + \
    scale_color_discrete() + scale_fill_discrete() + \
    facet_grid(x='year') + \
    ggtitle('Mean Temperature by Month') + \
    theme(legend_position='none')

bunch = GGBunch()
bunch.add_plot(p1, 0, 0, 1000, 500)
bunch.add_plot(p2, 0, 500, 1000, 300)
bunch.show()

### Year-to-Year Temperature Comparison

In [5]:
ggplot() + \
    geom_line(aes(x='day', y='mean_temp', group='year', color='year'), data=df, size=2) + \
    scale_x_discrete() + scale_color_discrete() + \
    facet_grid(y='month') + \
    ggtitle('Mean Temperature for Each Month') + \
    theme(legend_position='bottom')

### Most Common Temperature Values

In [6]:
ggplot() + \
    geom_histogram(aes(x='mean_temp', group='year', color='year', fill='year'), data=df, bins=15, size=1, alpha=.5) + \
    scale_color_discrete() + scale_fill_discrete() + \
    facet_grid(x='year', y='month') + \
    ggtitle('Most Common Temperature') + \
    ggsize(1000, 2000)

### Heatmap Of Temperatures

In [7]:
int_mean_temp_df = df[['mean_temp', 'month', 'year']].copy()
int_mean_temp_df.mean_temp = int_mean_temp_df.mean_temp.astype(int)

ggplot() + \
    geom_point(aes(x='month', y='mean_temp', color='mean_temp'), data=int_mean_temp_df, shape=15, size=4, alpha=.2) + \
    scale_color_gradient(name='mean temperature', low='#abd9e9', high='#d7191c') + \
    facet_grid(x='year') + \
    coord_fixed(ratio=1) + \
    xlab('month') + ylab('') + \
    ggtitle('Heatmap of Temperatures by Year') + \
    ggsize(1000, 500) + \
    theme(legend_position='bottom', axis_text='blank', axis_ticks='blank', axis_line='blank')

### Observing Mean Temperature and Wind Speed Correlation

In [8]:
ggplot() + \
    geom_point(aes(x='wind_speed', y='mean_temp', color='mean_temp', fill='mean_temp'), data=df, shape=21, size=3, alpha=.2) + \
    scale_color_gradient(name='', low='#abd9e9', high='#d7191c') + \
    scale_fill_gradient(name='', low='#abd9e9', high='#d7191c') + \
    facet_grid(x='year') + \
    xlab('wind speed') + ylab('mean temperature') + \
    ggtitle('Relation Between Mean Temperature and Wind Speed') + \
    ggsize(1000, 300)

### Observing Mean Temperature and Humidity Correlation

In [9]:
ggplot() + \
    geom_point(aes(x='humidity', y='mean_temp', color='humidity', fill='humidity'), data=df, shape=21, size=3, alpha=.2) + \
    scale_color_gradient(name='', low='#fdae61', high='#2c7bb6') + \
    scale_fill_gradient(name='', low='#fdae61', high='#2c7bb6') + \
    facet_grid(x='year') + \
    xlab('humidity') + ylab('mean temperature') + \
    ggtitle('Relation Between Mean Temperature and Humidity') + \
    ggsize(1000, 300)

### In Search of Correlation on Lag Scatter Plots

In [10]:
df_shifted_by_day = df[['mean_temp', 'year']].copy()
df_shifted_by_day['mean_temp_shifted'] = df.mean_temp.shift(-1)
df_shifted_by_day = df_shifted_by_day.dropna()

p1 = ggplot() + \
    geom_point(aes(x='mean_temp', y='mean_temp_shifted', color='mean_temp', fill='mean_temp'), data=df_shifted_by_day, shape=21, size=3, alpha=.2) + \
    scale_color_gradient(name='', low='#abd9e9', high='#d7191c') + \
    scale_fill_gradient(name='', low='#abd9e9', high='#d7191c') + \
    facet_grid(x='year') + \
    coord_fixed(ratio=1) + \
    xlab('mean temperature') + ylab('shifted mean temperature') + \
    ggtitle('One Day Lag Scatter Plot')

df_shifted_by_month = df[['mean_temp', 'year']].copy()
df_shifted_by_month['mean_temp_shifted'] = df.mean_temp.shift(-30)
df_shifted_by_month = df_shifted_by_month.dropna()

p2 = ggplot() + \
    geom_point(aes(x='mean_temp', y='mean_temp_shifted', color='mean_temp', fill='mean_temp'), data=df_shifted_by_month, shape=21, size=3, alpha=.2) + \
    scale_color_gradient(name='', low='#abd9e9', high='#d7191c') + \
    scale_fill_gradient(name='', low='#abd9e9', high='#d7191c') + \
    facet_grid(x='year') + \
    coord_fixed(ratio=1) + \
    xlab('mean temperature') + ylab('shifted mean temperature') + \
    ggtitle('One Month Lag Scatter Plot')

df_shifted_by_year = df[['mean_temp', 'year']].copy()
df_shifted_by_year['mean_temp_shifted'] = df.mean_temp.shift(-365)
df_shifted_by_year = df_shifted_by_year.dropna()[:-1]

p3 = ggplot() + \
    geom_point(aes(x='mean_temp', y='mean_temp_shifted', color='mean_temp', fill='mean_temp'), data=df_shifted_by_year, shape=21, size=3, alpha=.2) + \
    scale_color_gradient(name='', low='#abd9e9', high='#d7191c') + \
    scale_fill_gradient(name='', low='#abd9e9', high='#d7191c') + \
    facet_grid(x='year') + \
    coord_fixed(ratio=1) + \
    xlab('mean temperature') + ylab('shifted mean temperature') + \
    ggtitle('One Year Lag Scatter Plot')

bunch = GGBunch()
bunch.add_plot(p1, 0, 0, 1000, 300)
bunch.add_plot(p2, 0, 300, 1000, 300)
bunch.add_plot(p3, 0, 600, 1000, 300)
bunch.show()

###  Annual Path of Mean Temperature and Humidity

In [11]:
mean_df = df.groupby(by=['year', 'month']).mean()[['mean_temp', 'humidity']].reset_index()

ggplot(mean_df) + \
    geom_path(aes(x='humidity', y='mean_temp'), color='#99d8c9', size=1) + \
    geom_point(aes(x='humidity', y='mean_temp', fill='month'), shape=21, size=3, color='#00441b') + \
    scale_fill_gradient(name='', low='#e5f5f9', high='#2ca25f') + \
    facet_grid(x='year') + \
    ylab('mean temperature') + \
    ggtitle('Annual Path of Mean Temperature and Humidity') + \
    ggsize(1000, 300)

### Autocorrelation Plots for Mean Temperature, Wind Speed and Humidity

In [12]:
acf_df = pd.DataFrame([
    (lag, df.mean_temp.autocorr(lag=lag), df.wind_speed.autocorr(lag=lag), df.humidity.autocorr(lag=lag))
    for lag in range(365 * 3)
], columns=['lag', 'mean temperature acf', 'wind speed acf', 'humidity acf']).melt(
    id_vars=['lag'],
    value_vars=['mean temperature acf', 'wind speed acf', 'humidity acf'],
    var_name='acf_type', value_name='acf_value'
)

ggplot() + \
    geom_point(aes(x='lag', y='acf_value', color='acf_value'), data=acf_df, size=3) + \
    scale_color_gradient(low='#fc8d59', high='#91cf60') + \
    facet_grid(y='acf_type') + \
    ylab('ACF value') + \
    ggtitle('Autocorrelation Functions') + \
    ggsize(1000, 600) + \
    theme(legend_position='none')