In [79]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm

In [256]:
companies = ['Cap_63', 'Cap_126', 'Ele_63', 'Ele_126', 'Fin_63', 'Fin_126', 'FinEle_63', 'FinEle_126']

for company in companies:
    print(company)
    sector, ws = company.split('_')[0], int(company.split('_')[1])
    input_path = f'../01 Raw Data/{sector}'

    macro = pd.read_csv('../01 Raw Data/Macro/macro.csv', index_col='Date')
    stock_return = pd.read_csv(f'{input_path}/Stock returns.csv', index_col='Date')
    stock_return = stock_return[stock_return.index.isin(macro.index)]
    stock_rank = pd.read_csv(f'{input_path}/Top rank company.csv', index_col='Date', parse_dates=['Date'])
    stock_rank = stock_rank[stock_rank.index.isin(macro.index)]
    stock_select = pd.read_csv(f'{input_path}/Selected company.csv', index_col='Date', parse_dates=['Date'])
    stock_select = stock_select[stock_select.index.isin(macro.index)]

    # Lambda
    full_lambda = np.zeros((stock_return.shape[0] - int(ws), len(stock_return.columns)))
    for i in range(len(stock_return.columns)):
        lambda_firm = pd.read_csv(f'{company}/lambda_{i+1}.csv')
        full_lambda[:, i] = lambda_firm.iloc[:, 1].to_numpy()
    full_lambda = pd.DataFrame(full_lambda)
    full_lambda.index = stock_return.index[ws:]
    full_lambda.index = pd.to_datetime(full_lambda.index)
    full_lambda.index.name = None
    full_lambda.columns = stock_return.columns

    for date in full_lambda.index:
        top_company = stock_rank.loc[date].values
        top_company = [str(stock) for stock in top_company]
        full_lambda.loc[date, ~full_lambda.columns.isin(top_company)] = np.nan
    full_lambda.to_csv(f'{company}/full_lambda.csv', index=True, index_label='Date')

    # Check point
    if stock_select.iloc[ws:,:].equals(~full_lambda.isna()):
        print('Success!')
    else:
        print('WARNING! Mismatch with the selected companies data!!!')

    # New beta
    full_variables = stock_return.columns.append(macro.columns)
    for i in tqdm(range(0, len(stock_return.columns))):
        beta = pd.read_csv(f'{company}/beta_{i+1}.csv', index_col=0)
        new_beta = pd.DataFrame(np.nan, index=stock_return.index[ws:], columns=full_variables)
        new_beta.drop(full_variables[i], axis=1, inplace=True)

        j = 0
        for date in stock_return.index[ws:]:
            k = 0
            for column in new_beta.columns:
                if column in str(stock_rank.iloc[j].values):
                    new_beta.loc[date, column] = beta.iloc[j,k]
                    k += 1
            j += 1
        new_beta.iloc[:,-len(macro.columns):] = beta.iloc[:,-len(macro.columns):]
        new_beta.to_csv(f'{company}/new_beta_{i+1}.csv', index=True, index_label='Date')


    # Coefficient matrix
    p = stock_return.shape[1] + macro.shape[1] - 1
    full_beta = np.zeros((stock_return.shape[0] - ws, stock_return.shape[1] * p))

    for i in range(stock_return.shape[1]):
        beta_coin = pd.read_csv(f'{company}/new_beta_{i+1}.csv', index_col='Date')
        full_beta[:, p*i:p*(i+1)] = beta_coin.values[:, :]

    coef_list = []
    for i in tqdm(range(stock_return.shape[0]-ws)):
        day_coef = np.zeros((stock_return.shape[1], stock_return.shape[1]))
        for j in range(stock_return.shape[1]):
            coef_coin = full_beta[i, p*j:(p*(j+1)-macro.shape[1])]
            coef_coin = np.insert(coef_coin, j, 0)
            day_coef[j, :] = coef_coin
        day_coef = pd.DataFrame(day_coef, columns=stock_return.columns)
        nan_positions = np.argwhere(np.isnan(day_coef.values))
        for pos in nan_positions:
            day_coef.iloc[pos[1], pos[0]] = np.nan
        coef_list.append(day_coef)
    coef = pd.concat(coef_list, axis=1)
    coef.index = stock_return.columns
    coef.to_csv(f'{company}/coefficient matrix.csv', index=True, index_label='Company')

Cap_63
Success!


100%|██████████| 49/49 [07:58<00:00,  9.76s/it]
100%|██████████| 4143/4143 [01:16<00:00, 54.34it/s]


Cap_126
Success!


100%|██████████| 49/49 [07:54<00:00,  9.68s/it]
100%|██████████| 4080/4080 [01:09<00:00, 58.44it/s]


Ele_63
Success!


100%|██████████| 43/43 [06:23<00:00,  8.91s/it]
100%|██████████| 4143/4143 [00:49<00:00, 83.56it/s]


Ele_126
Success!


100%|██████████| 43/43 [06:15<00:00,  8.74s/it]
100%|██████████| 4080/4080 [00:48<00:00, 83.35it/s]


Fin_63
Success!


100%|██████████| 29/29 [03:20<00:00,  6.93s/it]
100%|██████████| 4143/4143 [00:13<00:00, 306.27it/s]


Fin_126
Success!


100%|██████████| 29/29 [03:18<00:00,  6.83s/it]
100%|██████████| 4080/4080 [00:13<00:00, 304.22it/s]


FinEle_63
Success!


100%|██████████| 42/42 [06:09<00:00,  8.79s/it]
100%|██████████| 4143/4143 [00:46<00:00, 89.06it/s]


FinEle_126
Success!


100%|██████████| 42/42 [06:04<00:00,  8.68s/it]
100%|██████████| 4080/4080 [00:45<00:00, 89.02it/s]
