# Import Libraries

In [5]:
import pandas as pd
import talib
import os
import sqlalchemy as sqla
import pymysql
from sqlalchemy import text

# Data Processing

## Connect to DataBase

In [6]:
db = sqla.create_engine('mysql+pymysql://root:123456789@34.124.177.28:3306/qf5214 test')
conn = db.connect()

## Get Stock Time Series Data

In [7]:
query = text('select id, date, close from timeseries_daily_index')
stock_close = pd.read_sql(query, con = conn).sort_values(by='date')

## Save DataBase Data

In [9]:
if not os.path.exists('../db_data/'):
    os.makedirs('../db_data/')
stock_close.set_index('id').to_csv('../db_data/stock_close.csv')

## Process Saved Data

In [10]:
stock_close = pd.read_csv('../db_data/stock_close.csv')
stock_close.columns = ['order_book_id','date','close']

In [11]:
# select the close price of 4 index: '000016.XSHG', '000300.XSHG', '000852.XSHG', '000905.XSHG'
stock_data = pd.pivot(stock_close, index='date',columns='order_book_id')
stock_data.columns = stock_data.columns.get_level_values(1)
stock_data = stock_data[['000016.XSHG', '000300.XSHG', '000852.XSHG', '000905.XSHG' ]].fillna(method='ffill')

# Stock Indictors

In [12]:
# 1- moving average
stock_data_MA5= stock_data.rolling(window=5).mean()
stock_data_MA10 = stock_data.rolling(window=10).mean()
stock_data_MA20 = stock_data.rolling(window=20).mean()

In [13]:
# 2- Relative Strength Index，RSI
stock_RSI = pd.DataFrame(index = stock_data.index)
for column in stock_data.columns:
    stock_RSI[column] = talib.RSI(stock_data[column].values, timeperiod=14)

In [14]:
# 3- Moving Average Convergence/Divergence，MACD
macd = pd.DataFrame(index = stock_data.index)
MACD_signal = pd.DataFrame(index = stock_data.index)
for column in stock_data.columns:
    macd[column], MACD_signal[column], _ = talib.MACD(stock_data[column].values, fastperiod=12, slowperiod=26, signalperiod=9)

In [15]:
# 4- Bollinger Bands
n = 20
# Middle band
SMA_n = stock_data.rolling(window=n).mean()
# standard deviation
stock_std = stock_data.rolling(window=n).std()
# Bolling upperband and lowerband
Upperband = SMA_n + 2 * stock_std
Lowerband = SMA_n - 2 * stock_std

In [16]:
# 5- Daily return 
stock_ret = stock_data.pct_change()


In [17]:
#6- 20-day volatility
stock_vol = stock_data.rolling(window = 20).std()

In [18]:
# concaternate the indicators and rename the columns
indicator_list = [stock_data, stock_data_MA5, stock_data_MA10, stock_data_MA20, stock_RSI, macd, MACD_signal, Upperband, Lowerband, stock_ret, stock_vol]
all_indicators = pd.concat(indicator_list, axis =1)
all_indicators.columns = ['SSE50', 'CSI300', 'CSI1000', 'CSI500', 'SSE50_MA5', 'CSI300_MA5', 'CSI1000_MA5', 'CSI500_MA5', 'SSE50_MA10', 'CSI300_MA10', 'CSI1000_MA10', 'CSI500_MA10', 'SSE50_MA20', 'CSI300_MA20', 'CSI1000_MA20', 'CSI500_MA20', 'SSE50_RSI', 'CSI300_RSI', 'CSI1000_RSI', 'CSI500_RSI', 'SSE50_MACD', 'CSI300_MACD', 'CSI1000_MACD', 'CSI500_MACD', 'SSE50_MACD_signal', 'CSI300_MACD_signal', 'CSI1000_MACD_signal', 'CSI500_MACD_signal', 'SSE50_upperband', 'CSI300_upperband', 'CSI1000_upperband', 'CSI500_upperband','SSE50_lowerband', 'CSI300_lowerband', 'CSI1000_lowerband', 'CSI500_lowerband', 'SSE50_return', 'CSI300_return', 'CSI1000_return', 'CSI500_return', 'SSE50_vol', 'CSI300_vol', 'CSI1000_vol', 'CSI50_vol']

# Save Result

In [19]:
# save the indicator result
if not os.path.exists('../result/'):
    os.makedirs('../result/')
all_indicators.dropna(axis =0, how = 'any').to_csv('../result/all_indicators_stock.csv')

# Check Result

In [20]:
# check the indicators
s_df = pd.read_csv('../result/all_indicators_stock.csv').set_index('date')
s_df.index = pd.to_datetime(s_df.index)

# Plot

In [21]:
print(os.getcwd())

/Users/jiming/Desktop/Project/code


In [22]:
%load_ext autoreload
%autoreload 2

from plot_line import plot_line

In [23]:
s_df.iloc[:, 36:40] = s_df.iloc[:, 36:40] * 100

## Basic Chart - Index

In [27]:
df0 = s_df.iloc[:, :4]
title = 'Stock Index'
params_dict = {
    'title': title,
    'df': df0,
    'col_dict': {0: df0.columns},
    'freq': 'd'
}

plot_line(**params_dict)

<pyecharts.charts.basic_charts.line.Line at 0x1bf7dcd90>

## MA5 of Stock Index

In [28]:
df = s_df.loc[:, s_df.columns.str.contains('MA5')]
df.columns = df0.columns
title = 'MA5 of Stock Index'
params_dict = {
    'title': title,
    'df': df,
    'col_dict': {0: df.columns},
    'freq': 'd'
}

plot_line(**params_dict)

<pyecharts.charts.basic_charts.line.Line at 0x1bfc0b9a0>

## MA10 of Stock Index

In [29]:
df = s_df.loc[:, s_df.columns.str.contains('MA10')]
df.columns = df0.columns
title = 'MA10 of Stock Index'
params_dict = {
    'title': title,
    'df': df,
    'col_dict': {0: df.columns},
    'freq': 'd'
}

plot_line(**params_dict)

<pyecharts.charts.basic_charts.line.Line at 0x1bf506670>

## MA20 of Stock Index

In [30]:
df = s_df.loc[:, s_df.columns.str.contains('MA20')]
df.columns = df0.columns
title = 'MA20 of Stock Index'
params_dict = {
    'title': title,
    'df': df,
    'col_dict': {0: df.columns},
    'freq': 'd'
}

plot_line(**params_dict)

<pyecharts.charts.basic_charts.line.Line at 0x1bf874580>

## RSI of Stock Index

In [31]:
df = s_df.loc[:, s_df.columns.str.contains('RSI')]
df.columns = df0.columns
title = 'RSI of Stock Index'
params_dict = {
    'title': title,
    'df': df,
    'col_dict': {0: df.columns},
    'freq': 'd'
}

plot_line(**params_dict)

<pyecharts.charts.basic_charts.line.Line at 0x1bfbf9910>

## MACD of Stock Index

In [32]:
df = s_df.loc[:, s_df.columns.str.contains('MACD') & ~s_df.columns.str.contains('signal')]
df.columns = df0.columns
title = 'MACD of Stock Index'
params_dict = {
    'title': title,
    'df': df,
    'col_dict': {0: df.columns},
    'freq': 'd'
}

plot_line(**params_dict)

<pyecharts.charts.basic_charts.line.Line at 0x1bf8746a0>

## MACD Signal of Stock Index

In [33]:
df = s_df.loc[:, s_df.columns.str.contains('MACD_signal')]
df.columns = df0.columns
title = 'MACD Signal of Stock Index'
params_dict = {
    'title': title,
    'df': df,
    'col_dict': {0: df.columns},
    'freq': 'd'
}

plot_line(**params_dict)

<pyecharts.charts.basic_charts.line.Line at 0x1bfc15d00>

## Bolling Bands of Stock Index

In [34]:
df = s_df.loc[:, s_df.columns.str.contains('band')]
df.columns = [i.split('_')[0] for i in list(df.columns)]
df_1 = df.iloc[:,:4]
df_2 = df.iloc[:, 4:][::-1]
df = pd.concat([df_1, df_2], axis = 0)
title = 'Bollingar Bands of Stock Index'
params_dict = {
    'title': title,
    'df': df,
    'col_dict': {0: df.columns},
    'freq': 'd'
}

plot_line(**params_dict)

<pyecharts.charts.basic_charts.line.Line at 0x1bfa66670>

## Returns of Stock Index

In [35]:
df = s_df.loc[:, s_df.columns.str.contains('return')]
df.columns = [i + '%' for i in list(df0.columns)]
title = 'Returns of Stock Index'
params_dict = {
    'title': title,
    'df': df,
    'col_dict': {0: df.columns},
    'freq': 'd'
}

plot_line(**params_dict)

<pyecharts.charts.basic_charts.line.Line at 0x1bfa66a30>

## Volatility of Stock Index

In [36]:
df = s_df.loc[:, s_df.columns.str.contains('vol')]
df.columns = [i + '%' for i in list(df0.columns)]
title = 'Volatilities of Stock Index'
params_dict = {
    'title': title,
    'df': df,
    'col_dict': {0: df.columns},
    'freq': 'd'
}

plot_line(**params_dict)

<pyecharts.charts.basic_charts.line.Line at 0x1bf7dcac0>