In [1]:
import numpy as np
import pandas as pd
import math
import copy
import json

In [2]:
# Number of days to include
SIZE = 175 

In [3]:
prices = pd.read_csv("prices.txt", delim_whitespace=True, header=None)

In [4]:
prices_partial = prices[0:SIZE]
prices_partial.shape

(175, 50)

In [5]:
# Return for each stock, each day
prices_ret = []
for stock in range(prices_partial.shape[1]):
    temp = []
    for t in range(prices_partial.shape[0]-1):
        temp.append(prices_partial[stock][t+1] / prices_partial[stock][t])
    prices_ret.append(temp)
        

In [6]:
# Log return
prices_ret_log = copy.deepcopy(prices_ret)

for i in range(len(prices_ret_log)):
    for j in range(len(prices_ret_log[0])):
        prices_ret_log[i][j] = math.log(prices_ret_log[i][j])

In [7]:
# Sign return 
prices_ret_sign = copy.deepcopy(prices_ret)

for i in range(len(prices_ret_sign)):
    for j in range(len(prices_ret_sign[0])):
        prices_ret_sign[i][j] = (prices_ret_sign[i][j] > 1)
        

In [8]:
# Correlation matrix
prices_ret = pd.DataFrame(prices_ret).T
corr_matrix = prices_ret.corr()

prices_ret_log = pd.DataFrame(prices_ret_log).T
corr_matrix_log = prices_ret_log.corr()

prices_ret_sign = pd.DataFrame(prices_ret_sign).T
corr_matrix_sign = prices_ret_sign.corr()

In [9]:
# Convert correlation matrix into sorted dictionary

ret_corr_dict = {}
ret_corr_dict_log = {}
ret_corr_dict_sign = {}

for i in range(corr_matrix.shape[0]):
    single_stock = {}
    single_stock_log = {}
    single_stock_sign = {}
    
    for j in range(len(corr_matrix[i])):
        if (i != j):
            single_stock[j] = corr_matrix[i][j]
            single_stock_log[j] = corr_matrix_log[i][j]
            single_stock_sign[j] = corr_matrix_sign[i][j]
            
    single_stock = sorted(single_stock.items(), key=lambda x:(x[1]), reverse=True)
    single_stock_log = sorted(single_stock_log.items(), key=lambda x:(x[1]), reverse=True)
    single_stock_sign = sorted(single_stock_sign.items(), key=lambda x:(x[1]), reverse=True)
    
    ret_corr_dict["Stock " + str(i)] = single_stock
    ret_corr_dict_log["Stock " + str(i)] = single_stock_log
    ret_corr_dict_sign["Stock " + str(i)] = single_stock_sign
    
    


In [10]:
ret_corr_dict

{'Stock 0': [(12, 0.18662394110931232),
  (41, 0.18127652331828806),
  (19, 0.15689815457737716),
  (14, 0.14195796479592604),
  (48, 0.13161125116986558),
  (40, 0.12771293918613652),
  (31, 0.11731657073593874),
  (30, 0.10751783471201228),
  (17, 0.09604877459025159),
  (8, 0.0871557808152043),
  (37, 0.08266491933174011),
  (33, 0.08243257304053685),
  (7, 0.08002497820847577),
  (10, 0.06980700828417087),
  (3, 0.06751223727866185),
  (24, 0.061437256740833494),
  (39, 0.05332821323850711),
  (46, 0.05255914154217563),
  (16, 0.05247786836469869),
  (11, 0.050485281525268835),
  (5, 0.041616810006319616),
  (1, 0.04058792357042951),
  (13, 0.040156787166813576),
  (9, 0.03821947344903504),
  (22, 0.03677836700694499),
  (15, 0.03655398304966223),
  (21, 0.03604384987516981),
  (42, 0.03410280638335495),
  (29, 0.02233993086288093),
  (28, 0.01966636085607703),
  (18, 0.01943766713242254),
  (23, 0.0016173376318757567),
  (25, 0.0006168953783082921),
  (49, -0.004457624067439568),


In [11]:
ret_corr_dict_log

{'Stock 0': [(12, 0.18657880337566027),
  (41, 0.1821262442416924),
  (19, 0.15683149438101848),
  (14, 0.14216315471824453),
  (48, 0.13128876358296826),
  (40, 0.1278758051964943),
  (31, 0.11770794157210941),
  (30, 0.10774010579628218),
  (17, 0.09562949683134243),
  (8, 0.08755809367625028),
  (33, 0.08263985186038764),
  (37, 0.08240144259247667),
  (7, 0.08039117410061168),
  (10, 0.06988144633610291),
  (3, 0.06763950256021074),
  (24, 0.0614854578718202),
  (39, 0.05368556249485106),
  (46, 0.05260126051690274),
  (16, 0.051851166163128994),
  (11, 0.05031266965369685),
  (5, 0.04108537352395025),
  (1, 0.04069760119547909),
  (13, 0.04032841934100449),
  (9, 0.03799537770429847),
  (22, 0.03671506298823425),
  (21, 0.03635479694092153),
  (15, 0.03577511480157441),
  (42, 0.034033825993252206),
  (29, 0.02200156281862641),
  (18, 0.020145274670796836),
  (28, 0.019832292746297026),
  (23, 0.0014646543489680208),
  (25, 0.0002645160566258218),
  (49, -0.004217305174290564),
  

In [12]:
ret_corr_dict_sign

{'Stock 0': [(30, 0.173524024524938),
  (19, 0.16765930160795364),
  (16, 0.150849119675992),
  (8, 0.14992069785329945),
  (14, 0.12162667158167924),
  (24, 0.11479408805370399),
  (31, 0.11454274533406635),
  (28, 0.11437769912749948),
  (39, 0.10332980972515853),
  (10, 0.10301591497498684),
  (40, 0.10281760447612939),
  (48, 0.09124673029192824),
  (37, 0.08954394269430195),
  (46, 0.07965091850778183),
  (13, 0.06801616926835383),
  (17, 0.057346723044397516),
  (49, 0.057346723044397474),
  (3, 0.05689740776670607),
  (12, 0.05222823352602996),
  (11, 0.04655779025126021),
  (15, 0.0451141736697794),
  (34, 0.044530132955217405),
  (41, 0.03490217304521256),
  (22, 0.03461945031712469),
  (7, 0.03388040270533628),
  (20, 0.033346601424786376),
  (44, 0.020028613271904677),
  (21, 0.010361464016974319),
  (25, 0.0002643230079032435),
  (38, -0.001874197512055761),
  (36, -0.011363636363636364),
  (18, -0.011627906976744162),
  (23, -0.011898468083595211),
  (1, -0.012175769722230

In [13]:
with open ('../data/train_logret_corr.json', 'w') as f:
    json.dump(ret_corr_dict_log, f)