In [56]:
import numpy as np
import pandas as pd

## 外部指標

In [57]:
transaction_data_path = '/Users/wangchangzai/Desktop/巨量資料應用專題/Nomurafunds/Merging/transaction/'

# read transaction table
feature = pd.read_csv(transaction_data_path + 'hyper_level_trans.csv', encoding='utf-8-sig')
feature.rename(columns={'ALLOT_DATE': 'Allot_Date', 'ALLOT_AMT': 'Allot_Amount'}, inplace=True)

In [58]:
# read TAIEX table
TAIEX = pd.read_csv('/Users/wangchangzai/Desktop/巨量資料應用專題/Nomurafunds/Merging/indices/TAIEX.csv', encoding='utf-8-sig')
# rename columns
TAIEX.rename(columns={'日期': 'Allot_Date', '三日平均':'TAIEX_index'}, inplace=True)
# turn TAIEX_index into float
per_TAIEX = []
for value in TAIEX['TAIEX_index']:
    tmp = value[:-1]
    per_TAIEX.append(tmp)

TAIEX['TAIEX_index'] = per_TAIEX
# drop unnecessary columns
TAIEX.drop(columns=['更改%'], inplace=True)
# merge TAIEX into feature table
feature = feature.merge(TAIEX, how='left', on='Allot_Date')

In [59]:
# read VIX table
VIX = pd.read_csv('/Users/wangchangzai/Desktop/巨量資料應用專題/Nomurafunds/Merging/indices/VIX.csv', encoding='utf-8-sig')
# rename columns
VIX.rename(columns={'日期': 'Allot_Date', '三日平均':'VIX_index'}, inplace=True)
# drop unnecessary columns
VIX.drop(columns=['更改%數'], inplace=True)
# merge TAIEX into feature table
feature = feature.merge(VIX, how='left', on='Allot_Date')

In [60]:
# read SnP_500 table
SnP_500 = pd.read_csv('/Users/wangchangzai/Desktop/巨量資料應用專題/Nomurafunds/Merging/indices/S&P_500.csv', encoding='utf-8-sig')
# rename columns
SnP_500.rename(columns={'Date': 'Allot_Date', '三日平均':'S&P_500_index'}, inplace=True)
# drop unnecessary columns
SnP_500.drop(columns=['變動率'], inplace=True)
# merge TAIEX into feature table
feature = feature.merge(SnP_500, how='left', on='Allot_Date')

In [61]:
feature

Unnamed: 0,Allot_Date,Fund_Name,Allot_Amount,TAIEX_index,VIX_index,S&P_500_index
0,2021-10-14,(未核備) 荷寶資本成長基金-荷寶中國股票,0,-0.51,-0.043700,0.005890
1,2021-12-06,(未核備) 荷寶資本成長基金-荷寶中國股票,-120524,-0.12,0.038333,0.011731
2,2021-10-01,(未核備) 荷寶資本成長基金-荷寶中國股票,-220000,-1.19,-0.007533,0.000384
3,2021-10-14,(未核備) 荷寶資本成長基金-荷寶中國股票,0,-0.51,-0.043700,0.005890
4,2021-12-06,(未核備) 荷寶資本成長基金-荷寶中國股票,-120524,-0.12,0.038333,0.011731
...,...,...,...,...,...,...
2391770,2022-01-20,駿利亨德森資產管理基金 - 駿利亨德森美國創業基金,500,-0.55,0.046400,-0.013038
2391771,2021-10-05,駿利亨德森資產管理基金 - 駿利亨德森美國創業基金,139140,-0.94,-0.015967,-0.005149
2391772,2022-01-20,駿利亨德森資產管理基金 - 駿利亨德森美國創業基金,500,-0.55,0.046400,-0.013038
2391773,2021-10-05,駿利亨德森資產管理基金 - 駿利亨德森美國創業基金,139140,-0.94,-0.015967,-0.005149


## 情緒分數

In [62]:
# read multiple csv as one dataframe
import glob

# get data file names
path = r'/Users/wangchangzai/Desktop/巨量資料應用專題/Nomurafunds/Merging/senti_score'
filenames = glob.glob(path + "/*.csv")

dfs = []
for filename in filenames:
    dfs.append(pd.read_csv(filename))

# Concatenate all data into one DataFrame
senti_score_df = pd.concat(dfs, ignore_index=True)

In [63]:
# rename columns of senti_score_df and merge then fill missing score with 0
senti_score_df.rename(columns={'fund': 'Fund_Name', 'publishAt': 'Allot_Date', 'positive': 'sentiment_score_pos', 'negative': 'sentiment_score_neg'}, inplace=True)
feature = feature.merge(senti_score_df, how='left', on=['Allot_Date', 'Fund_Name'])
feature.fillna(0, inplace=True)

In [64]:
feature = feature.reindex([
    'Allot_Date', 
    'Fund_Name', 
    'TAIEX_index', 
    'VIX_index', 
    'S&P_500_index', 
    'sentiment_score_pos', 
    'sentiment_score_neg', 
    'Allot_Amount'
    ], axis=1)

In [65]:
feature

Unnamed: 0,Allot_Date,Fund_Name,TAIEX_index,VIX_index,S&P_500_index,sentiment_score_pos,sentiment_score_neg,Allot_Amount
0,2021-10-14,(未核備) 荷寶資本成長基金-荷寶中國股票,-0.51,-0.043700,0.005890,0.0,0.0,0
1,2021-12-06,(未核備) 荷寶資本成長基金-荷寶中國股票,-0.12,0.038333,0.011731,0.0,0.0,-120524
2,2021-10-01,(未核備) 荷寶資本成長基金-荷寶中國股票,-1.19,-0.007533,0.000384,0.0,0.0,-220000
3,2021-10-14,(未核備) 荷寶資本成長基金-荷寶中國股票,-0.51,-0.043700,0.005890,0.0,0.0,0
4,2021-12-06,(未核備) 荷寶資本成長基金-荷寶中國股票,-0.12,0.038333,0.011731,0.0,0.0,-120524
...,...,...,...,...,...,...,...,...
2397312,2022-01-20,駿利亨德森資產管理基金 - 駿利亨德森美國創業基金,-0.55,0.046400,-0.013038,0.0,0.0,500
2397313,2021-10-05,駿利亨德森資產管理基金 - 駿利亨德森美國創業基金,-0.94,-0.015967,-0.005149,0.0,0.0,139140
2397314,2022-01-20,駿利亨德森資產管理基金 - 駿利亨德森美國創業基金,-0.55,0.046400,-0.013038,0.0,0.0,500
2397315,2021-10-05,駿利亨德森資產管理基金 - 駿利亨德森美國創業基金,-0.94,-0.015967,-0.005149,0.0,0.0,139140


In [66]:
feature.to_csv('/Users/wangchangzai/Desktop/巨量資料應用專題/Nomurafunds/FeatureTable/hyper_level_users.csv', index=False)