# Introduction of time series dataset

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import io

In [None]:
# check a current path
import os
print(os.getcwd())

In [None]:
airline_data = pd.read_csv('https://raw.githubusercontent.com/Sangmann/lg/main/international-airline-passengers.csv')

In [None]:
airline_data.head(5)

In [None]:
# check column names
airline_data.columns

In [None]:
airline_data.columns[1]

In [None]:
# change a column name
airline_data.rename(columns={airline_data.columns[1]: 'passengers'}, inplace=True)
airline_data.columns[1]

In [None]:
# check 'NaN' in dataset
airline_data.isnull()

In [None]:
# count 'NaN' in dataset
airline_data.isnull().sum()

In [None]:
airline_data.tail(5)

In [None]:
# drop 'NaN' in dataset
airline_data.dropna(inplace=True)
airline_data.isnull().sum()

In [None]:
airline_data.tail(5)

In [None]:
airline_data.set_index('Month', inplace=True)
airline_data.index=pd.to_datetime(airline_data.index)
airline_data

In [None]:
# plotting
fig = plt.figure(figsize=(12,8))
plt.plot(airline_data['passengers'])
plt.title('International airline passengers', fontsize=12)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Passengers', fontsize=12)

plt.show()

# Decomposition of time series data

In [None]:
pip install statsmodels

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
multiple_results = seasonal_decompose(airline_data['passengers'], model='multiplicable')

In [None]:
# Decomposition plotting
plt.rcParams['figure.figsize']=[12,8]
multiple_results.plot()
plt.show()

In [None]:
multiple_results.trend.plot()

In [None]:
multiple_results.seasonal.plot()

In [None]:
multiple_results.resid.plot()

In [None]:
addict_results = seasonal_decompose(airline_data['passengers'], model='additive')

In [None]:
plt.rcParams['figure.figsize']=[12,8]
addict_results.plot()
plt.show()

# Evaluation metrics

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error 

In [None]:
example_data = pd.read_csv('https://raw.githubusercontent.com/Sangmann/lg/main/example-mad-mae.csv')

In [None]:
example_data.head(5)

In [None]:
example_data.columns

In [None]:
def MAPE(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) *100

def MAD(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

def MSE(y_true, y_pred):
    return np.mean(np.square(y_true - y_pred))

#### MAD

In [None]:
# caluate a MAD(MAE)
mad1 = MAD(example_data['actual_y'], example_data['pred_y'])
print(f'MAD by coding = {mad1:.2f}')
mad2 = mean_absolute_error(example_data['actual_y'], example_data['pred_y'])
print(f'MAD in package = {mad2:.2f}')

#### MSE

In [None]:
# calculate a MSE
mse1 = MSE(example_data['actual_y'], example_data['pred_y'])
print(f'MSE by coding = {mse1:.2f}')
mse2 = mean_squared_error(example_data['actual_y'], example_data['pred_y'])
print(f'MSE in package = {mse2:.2f}')

#### MAPE

In [None]:
# calculate a MAPE
mape = MAPE(example_data['actual_y'], example_data['pred_y'])
print(f'MAPE by coding = {mape:.2f}')

# No-trend, linear-trend, quadratic-trend model

#### 1. no-trend model

In [None]:
no_trend_data = pd.read_csvexample_data = pd.read_csv('https://raw.githubusercontent.com/Sangmann/lg/main/no-trend-data.csv')

In [None]:
no_trend_data.head(5)

In [None]:
# No-trend model
pred_no_trend = (np.mean(no_trend_data['year1']) + np.mean(no_trend_data['year2']))/2
print(pred_no_trend)

In [None]:
x = pd.concat([no_trend_data['Month'], no_trend_data['Month']+12], axis=0)
y1 = pd.concat([no_trend_data['year1'], no_trend_data['year2']], axis=0)
y2 = np.full(24, [pred_no_trend])

In [None]:
# plotting predicion value and real value
fig = plt.figure(figsize=(12,8))
plt.scatter(x, y1, color='b')
plt.plot(x, y2, color='r')
plt.title('Prediction of "No-trend model"', fontsize=12)
plt.xlabel('Month', fontsize=12)
plt.ylabel('tons', fontsize=12)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)
plt.show()

#### 2. Linear-trend model

In [None]:
linear_data = pd.read_csv('https://raw.githubusercontent.com/Sangmann/lg/main/linear-trend-data.csv')

In [None]:
linear_data.head(5)

In [None]:
from statsmodels.formula.api import ols
import seaborn as sns

In [None]:
# lienar-trend model
linear_model = ols('Sales ~ Time', data=linear_data).fit()

In [None]:
linear_model.params

In [None]:
linear_model.summary()

In [None]:
pred_linear = linear_model.predict(linear_data['Time'])
pred_linear

In [None]:
new_time = pd.DataFrame([25], columns=['Time'])
pred_new_time = linear_model.predict(new_time)
print(f'predition value when Time = 25 : {pred_new_time[0]:.2f}')

In [None]:
plt.rcParams['figure.figsize'] = (12,8)
sns.regplot(x='Time', y='Sales', data=linear_data)
plt.show()

In [None]:
# plotting predicion value and real value
fig = plt.figure(figsize=(12,8))
plt.scatter(linear_data['Time'], linear_data['Sales'], color='g')
plt.plot(linear_data['Time'], pred_linear, color='orange')
plt.title('Prediction of "Linear-trend model"', fontsize=12)
plt.xlabel('Time', fontsize=12)
plt.ylabel('Sales', fontsize=12)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)
plt.show()

#### 3. Quadratic-trend model

In [None]:
quadratic_data = pd.read_csv('https://raw.githubusercontent.com/Sangmann/lg/main/quadratic-trend-data.csv')
quadratic_data.head(5)

In [None]:
quadratic_data.columns

In [None]:
# quadratic-trend model
quadratic_model = ols('Loan_request ~ Time+Time_squared', data=quadratic_data).fit()

In [None]:
quadratic_model.summary()

In [None]:
pred_quadra = quadratic_model.predict(quadratic_data[['Time', 'Time_squared']])
pred_quadra

In [None]:
# plotting predicion value and real value
fig = plt.figure(figsize=(12,8))
plt.scatter(quadratic_data['Time'], quadratic_data['Loan_request'], color='royalblue')
plt.plot(quadratic_data['Time'], pred_quadra, color='red')
plt.title('Prediction of "Quadratic-trend model"', fontsize=12)
plt.xlabel('Time', fontsize=12)
plt.ylabel('Sales', fontsize=12)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)
plt.show()

In [None]:
linear_model_Loan = ols('Loan_request ~ Time', data=quadratic_data).fit()
pred_linear_Loan = linear_model_Loan.predict(quadratic_data['Time'])

# plotting predicion value and real value (two models)
fig = plt.figure(figsize=(12,8))
plt.scatter(quadratic_data['Time'], quadratic_data['Loan_request'], color='royalblue')
plt.plot(quadratic_data['Time'], pred_quadra, color='red')
plt.plot(quadratic_data['Time'], pred_linear_Loan, color='green')
plt.title('Prediction of "Quadratic-trend model"', fontsize=12)
plt.xlabel('Time', fontsize=12)
plt.ylabel('Loan_request', fontsize=12)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)
plt.show()