# Correlation
- https://www.investopedia.com/terms/c/correlation.asp
- How 2 variables move in relation to each other
- Measure association, not cause
- 1: perfect correlation
- 0: no correlation
- -1: perfect opposite correlation

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib notebook
from yahoofinancials import YahooFinancials
from http.client import IncompleteRead
import numpy as np

In [2]:
data = pd.read_csv('results/focus.csv', index_col=0)
df = data.head(5)

In [3]:
top_companies = []
for i, row in df.iterrows():
    print(i)
    top_companies.append(i)

ADMCM.HE
ICP1V.HE
VIAFIN.HE
NESTE.HE
GOFORE.HE


In [4]:
def price_history(company, timeline):
    max_try = 5
    attemp = 1
    while attemp < max_try:
        try:
            yf = YahooFinancials(company)
            data = yf.get_historical_price_data('2018-04-12', '2021-04-12', timeline)
            data_1 = data[company]['prices']

        except IncompleteRead:
            attemp += 1
            if attemp == max_try:
                print(f'Failed {N}')
        except ValueError as e:
            print(f'{e}: {N}')
            attemp = max_try
        else:
            break
            
    df = pd.DataFrame.from_dict(data_1)
    if timeline == 'weekly':
        df.to_csv(f"results/price_history_weekly_{company.split('.')[0]}.csv")
    elif timeline == 'monthly':
        df.to_csv(f"results/price_history_monthly_{company.split('.')[0]}.csv")
    else:
        df.to_csv(f"results/price_history_{company.split('.')[0]}.csv")

In [5]:
top_prices = []

for i in top_companies:
    price_history(i, 'daily')    
    top_prices.append(f"results/price_history_{i.split('.')[0]}.csv")
    print(f"results/price_history_{i.split('.')[0]}.csv")

results/price_history_ADMCM.csv
results/price_history_ICP1V.csv
results/price_history_VIAFIN.csv
results/price_history_NESTE.csv
results/price_history_GOFORE.csv


In [6]:
df_data = []
for i in top_prices:
    data = pd.read_csv(i, index_col='formatted_date', parse_dates=True)
    del data['Unnamed: 0']
    df_data.append(data)

In [7]:
companies = []
for n in range(len(df_data)):
    company = df_data[n]['adjclose']
    companies.append(company)
df = pd.concat(companies, axis=1)
df.columns = top_companies
df

Unnamed: 0_level_0,ADMCM.HE,ICP1V.HE,VIAFIN.HE,NESTE.HE,GOFORE.HE
formatted_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-04-12,13.777140,6.000000,,17.098488,8.859752
2018-04-13,13.491439,5.920000,,16.907612,8.859752
2018-04-16,13.789043,5.980000,,16.735214,8.803500
2018-04-17,13.902134,6.000000,,16.876831,8.625367
2018-04-18,13.967607,6.060000,,16.618223,8.812875
...,...,...,...,...,...
2021-04-01,97.000000,24.400000,17.049999,46.389999,21.799999
2021-04-06,95.400002,24.750000,17.350000,45.470001,21.700001
2021-04-07,98.000000,25.200001,17.400000,45.549999,21.500000
2021-04-08,97.199997,25.799999,17.350000,45.040001,21.700001


In [10]:
data = np.log(df/df.shift())
data

Unnamed: 0_level_0,ADMCM.HE,ICP1V.HE,VIAFIN.HE,NESTE.HE,GOFORE.HE
formatted_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-04-12,,,,,
2018-04-13,-0.020955,-0.013423,,-0.011226,0.000000
2018-04-16,0.021819,0.010084,,-0.010249,-0.006369
2018-04-17,0.008168,0.003339,,0.008427,-0.020442
2018-04-18,0.004699,0.009950,,-0.015442,0.021506
...,...,...,...,...,...
2021-04-01,0.012448,0.004107,-0.002928,0.024660,0.009217
2021-04-06,-0.016632,0.014242,0.017442,-0.020031,-0.004598
2021-04-07,0.026889,0.018019,0.002878,0.001758,-0.009259
2021-04-08,-0.008197,0.023530,-0.002878,-0.011260,0.009259


In [11]:
data.corr()

Unnamed: 0,ADMCM.HE,ICP1V.HE,VIAFIN.HE,NESTE.HE,GOFORE.HE
ADMCM.HE,1.0,0.224499,0.250884,0.310234,0.223337
ICP1V.HE,0.224499,1.0,0.221908,0.17144,0.158463
VIAFIN.HE,0.250884,0.221908,1.0,0.159749,0.200921
NESTE.HE,0.310234,0.17144,0.159749,1.0,0.211714
GOFORE.HE,0.223337,0.158463,0.200921,0.211714,1.0


In [12]:
data.corr().style.background_gradient(cmap='Blues')

Unnamed: 0,ADMCM.HE,ICP1V.HE,VIAFIN.HE,NESTE.HE,GOFORE.HE
ADMCM.HE,1.0,0.224499,0.250884,0.310234,0.223337
ICP1V.HE,0.224499,1.0,0.221908,0.17144,0.158463
VIAFIN.HE,0.250884,0.221908,1.0,0.159749,0.200921
NESTE.HE,0.310234,0.17144,0.159749,1.0,0.211714
GOFORE.HE,0.223337,0.158463,0.200921,0.211714,1.0
