# Time Series Modelling

In [1]:
# import libraries
import os
import sys
import urllib.request
import json
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
import plotly.graph_objects as go
from pytrends.request import TrendReq
import statsmodels.api as sm
import statsmodels.formula.api as smf

## 1. Importing Data & Cleaning

* Google Trends
* Count of articles in five newspapers over time
* Covid-19 data
* Hyundai EV, HEV, and PHEV sales data

In [2]:
# google trends

pytrends = TrendReq(hl='ko', tz=540)

kw_list = ["전기차", "친환경"]

pytrends.build_payload(kw_list, cat=0, timeframe="2020-02-05 2022-11-01", geo="KR")
google = pytrends.interest_over_time()
google = google.reset_index()

trends = pytrends.related_queries()
top1 = trends["전기차"]["top"]

In [3]:
# naver news

news = pd.read_csv("/Users/2hyunie/Documents/qmss_thesis/Datasets/raw/articles_ev.csv")
news['date'] = pd.to_datetime(news['date'])
tot_count = news.iloc[:, 1:6]
news['total'] = tot_count.sum(axis=1)
news.head()

Unnamed: 0,date,khan,hani,chosun,donga,joongang,total
0,2018-01-01,28,27,0,63,0,118
1,2018-02-01,30,20,0,60,0,110
2,2018-03-01,16,24,0,57,0,97
3,2018-04-01,23,20,0,49,0,92
4,2018-05-01,30,18,0,35,0,83


In [4]:
# covid data

covid = pd.read_csv("/Users/2hyunie/Documents/qmss_thesis/Datasets/raw/covid_data.csv")
covid = covid.iloc[:, [0,1,2,6,11,12,15]]
covid.columns = ['date', 'seoul_pat', 'seoul_new', 'seoul_death', 'nat_pat', 'nat_new', 'nat_death']
covid['date'] = pd.to_datetime(covid['date'])
covid.head()

Unnamed: 0,date,seoul_pat,seoul_new,seoul_death,nat_pat,nat_new,nat_death
0,2022-11-01,5003593,13017,5608,25615667.0,58379,29209
1,2022-10-31,4990582,3099,5603,25557309.0,18510,29176
2,2022-10-30,4987483,6708,5598,25538799.0,34511,29158
3,2022-10-29,4980775,7578,5597,25504288.0,37321,29131
4,2022-10-28,4973197,7480,5594,25466992.0,35924,29100


In [5]:
# ev sales data

hyundai = pd.read_csv('/Users/2hyunie/Documents/qmss_thesis/Datasets/raw/hyundai.csv')
hyundai = hyundai.fillna(0)
hyundai['date'] = pd.to_datetime(hyundai['date'])

# sum of domestic ev sales

ev_d = hyundai.loc[:, ['ioniq_ev_d','g80_ev_d','kona_ev_d','gv70_ev_d']]
hyundai['ev_d'] = ev_d.sum(axis = 1)

# sum of ev export sales

ev_e = hyundai.loc[:, ['ioniq_ev_e','g80_ev_e','kona_ev_e','gv70_ev_e']]
hyundai['ev_e'] = ev_e.sum(axis = 1)

# sum of all ev sales

ev = hyundai.loc[:, ['ioniq_ev_d','g80_ev_d','kona_ev_d','gv70_ev_d','ioniq_ev_e','g80_ev_e','kona_ev_e','gv70_ev_e']]
hyundai['ev'] = ev.sum(axis = 1)

# sum of domestic hev sales

hev_d = hyundai.loc[:, ['avante_hev_d','ioniq_hev_d','sonata_hev_d','sonata_dn8_hev_d','grandeur_hev_d',
                  'kona_hev_d','tuscon_hev_d','santa_hev_d']]
hyundai['hev_d'] = hev_d.sum(axis = 1)

# sum of hev export sales

hev_e = hyundai.loc[:, ['avante_hev_e','ioniq_hev_e','sonata_hev_e','sonata_dn8_hev_e','grandeur_hev_e',
                 'kona_hev_e','tuscon_hev_e','santa_hev_e']]
hyundai['hev_e'] = hev_e.sum(axis = 1)

# sum of all hev sales

hev = hyundai.loc[:, ['avante_hev_d','ioniq_hev_d','sonata_hev_d','sonata_dn8_hev_d','grandeur_hev_d',
                  'kona_hev_d','tuscon_hev_d','santa_hev_d','avante_hev_e','ioniq_hev_e','sonata_hev_e','sonata_dn8_hev_e','grandeur_hev_e',
                 'kona_hev_e','tuscon_hev_e','santa_hev_e']]
hyundai['hev'] = hev.sum(axis = 1)

# total sales for ev, hev, phev

cols = hyundai.iloc[:, 1:31]
hyundai['total'] = cols.sum(axis = 1)
hyundai.head()

Unnamed: 0,date,avante_hev_d,ioniq_hev_d,ioniq_phev_d,ioniq_ev_d,sonata_hev_d,sonata_phev_d,sonata_dn8_hev_d,grandeur_hev_d,g80_ev_d,...,santa_hev_e,santa_phev_e,gv70_ev_e,ev_d,ev_e,ev,hev_d,hev_e,hev,total
0,2018-01-01,0.0,207.0,5.0,1086.0,289.0,1.0,0.0,1939,0.0,...,0.0,0.0,0.0,1086.0,873.0,1959.0,2435.0,4722.0,7157.0,10169.0
1,2018-02-01,0.0,322.0,11.0,949.0,294.0,4.0,0.0,1625,0.0,...,0.0,0.0,0.0,949.0,588.0,1537.0,2241.0,3657.0,5898.0,8517.0
2,2018-03-01,0.0,201.0,10.0,886.0,304.0,4.0,0.0,1945,0.0,...,0.0,0.0,0.0,886.0,1155.0,2041.0,2450.0,6469.0,8919.0,11918.0
3,2018-04-01,0.0,353.0,14.0,485.0,333.0,1.0,0.0,1728,0.0,...,0.0,0.0,0.0,485.0,655.0,1140.0,2414.0,6882.0,9296.0,11604.0
4,2018-05-01,0.0,259.0,7.0,548.0,347.0,4.0,0.0,2521,0.0,...,0.0,0.0,0.0,852.0,937.0,1789.0,3127.0,4276.0,7403.0,9845.0


## 2. Simple OLS

In [6]:
# trendline

# hyundai.plot(x = 'date', y = 'total', grid = True)
# sns.regplot(x = 'date', y = 'total', data = hyundai, scatter = False, ci = None, fit_reg = True, color = 'm')

In [7]:
# fit regression model

# lm_ev = smf.ols('total ~ date', data = hyundai).fit()
# lm_ev.summary()

In [8]:
# Correlation of two time series

# news['tot_pct'] = news['total'].pct_change()

# google.resample(rule='M').last()
# google['ev_pct'] = google['전기차'].pct_change()

# plt.scatter(news['tot_pct'], google['ev_pct'])
# plt.show()

# correlation = news['tot_pct'].corr(google['ev_pct'])
# print(correlation)

## 3. Lags?

## 4. 