# Pandas

## Resampling
https://www.w3resource.com/pandas/series/series-resample.php

In [None]:
from numpy import poly1d
import numpy as np
from scipy import linalg, sparse, misc
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import neighbors, datasets, preprocessing
# Preprocessing
from sklearn.model_selection import train_test_split
# Metrics
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score
# Model
from sklearn import linear_model

# ARIMA
from pandas.plotting import autocorrelation_plot
from statsmodels.tsa.arima_model import ARIMA

# ufo = pd.read_csv('http://bit.ly/uforeports')
# or
ufo = pd.read_csv('http://bit.ly/uforeports', index_col='Time', parse_dates=True)

# type
type(ufo)


# See indices
cities = ufo['City']
cities.index
type(cities)

cities['2000-12-01']
cities['September 2000':'December 2000']

# Group by
cities['December 2000'].groupby(level=0).size()

# Resampling

# Period
pd.period_range('12-12-2010', '12-10-2011', freq='D')

# aggregation on resampling
diary = cities['2000-06':].resample('D', kind='period').count()

# plot
plt.figure(figsize=(14,8))
diary.plot()

plt.figure(figsize=(14,8))
autocorrelation_plot(diary)

# ARIMA
model = ARIMA(diary, order=(5,1,0))
model_fit = model.fit(disp=0)
print(model_fit.summary())

# plot residual errors
residuals = pd.DataFrame(model_fit.resid)
residuals.plot()

# Show types
ufo.head()
ufo.dtypes

ufo['Time'] = pd.to_datetime(ufo.Time)

# Attributes
ufo.Time.dt.hour
ufo.Time.dt.weekday
ufo.Time.dt.dayofyear

# search on pandas api doc ".dt."

ts = pd.to_datetime("30/1/1999")

ufo.loc[ufo.Time >= ts,:].head()

# Max
ufo.Time.max()

# Time delta
(ufo.Time.max() - ufo.Time.min()).days

# some plotting
ufo['Year'] = ufo.Time.dt.Year
ufo.Year.value_counts().sort_index().plot()

# Working with `datetime`

In [None]:
# builin functions

from datetime import datetime, timedelta
date1 = datetime(2019,1,1)

# Show me
print(date1)
print(type(date1))
date1

# Help
datetime?

date2 = datetime(2019,1,1,1)
difference = date1 - date2

# Show me
type(difference)

# Help
timedelta?

# formating dates
date1.strftime('%d/%m/%Y')