In [None]:
import pandas as pd
import numpy as np
import scipy.stats.mstats

# GET HISTORICAL DATA

In [None]:
histoStocks = pd.read_csv("/home/khll/School/ENSAE/CIAM/data/candlestick_fund_And_SX5E.csv")
histoStocks["Date"] = pd.to_datetime(histoStocks["Date"])

# We'll only use the last price for vol/cor calculations at this point.
histoData=histoStocks[["Date"]+[colname for colname in histoStocks.columns if "Last" in colname]]
histoData.columns = [colname.replace(".Last","") for colname in histoData.columns]

histoData=histoData.set_index('Date')
histoData= histoData.dropna(thresh=20)

histoData=histoData.fillna(method='ffill')
histoData

# DEF util functions

In [None]:
def ciRollingSum(a, n) :
    if n == 1:
        return a
    ret = np.cumsum(a, axis=0, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:]

# GET DAY RETURN STOCKS & INDEX

In [None]:
nbDayReturn = 1
return1D = np.log(histoData) - np.log(histoData.shift(nbDayReturn))
return1D = return1D.dropna(how='all')
return1D = return1D.fillna(method='ffill')
return1D

# GET vol Realised 

In [None]:
volData_Realised_6M_NoWinsor =  return1D.rolling(126).std() * np.sqrt(252 / nbDayReturn)
volData_Realised_3M_NoWinsor = return1D.rolling(63).std() * np.sqrt(252 / nbDayReturn)

# GET vol Realised Winsorized

In [None]:
#Remove/flatten 2 observations on each side (63 * 4% = 2.52 -> 2 on each side)
volData_Realised_Winsor_3M = return1D.rolling(63 + nbDayReturn - 1).apply(lambda s:
                                                 ciRollingSum(
                                                     scipy.stats.mstats.winsorize(s,.04).data,nbDayReturn).std(),raw=True
                                                                          ) * np.sqrt(252 / nbDayReturn)

#Remove/flatten 3 observations on each side (126 * 2.5% = 3.15 -> 3 on each side)
volData_Realised_Winsor_6M = return1D.rolling(126 + nbDayReturn - 1).apply(lambda s:
                                                 ciRollingSum(
                                                     scipy.stats.mstats.winsorize(s,.025).data,nbDayReturn).std(),raw=True
                                                                          ) * np.sqrt(252 / nbDayReturn)

# GET correl Realised

In [None]:
correl_Realised_6M = pd.DataFrame()
correl_Realised_3M = pd.DataFrame()

for tickerLoop in return1D.columns:
    correl_Realised_6M[tickerLoop] = return1D[tickerLoop].rolling(126).corr(return1D['SX5E Index'])
    correl_Realised_3M[tickerLoop] = return1D[tickerLoop].rolling(63).corr(return1D['SX5E Index'])


# GET correl Realised Winsorized

## 3M

In [None]:
#63
NDay = 63
nWinsor = 4
pctWinsor = .04


nDailyObs = NDay + (nbDayReturn - 1) + nWinsor
startDate = '2018-01-02'
indexTicker = 'SX5E Index'

d = {'Dates': [], 'Tickers': [], 'Values': []}
correlRolling_ = pd.DataFrame(data=d)
correlRolling_ = pd.DataFrame(columns = ['Dates', 'Tickers', 'Values'])

for ticker in return1D.columns:
    print("Computing Correlation : " +ticker )
    dates = return1D[startDate:].index
    for date in dates:
        if ticker in return1D.columns:
            stockData = return1D[ticker][:date].tail(nDailyObs).values
            idxData = return1D[indexTicker][:date].tail(nDailyObs).values
            wStockData = scipy.stats.mstats.winsorize(stockData, pctWinsor)
            stockRetData = ciRollingSum(wStockData, nbDayReturn)
            idxRetDate = ciRollingSum(idxData, nbDayReturn)
            corr = np.corrcoef(stockRetData, idxRetDate)[1,0]
            df = {'Dates':date, 'Tickers': ticker, 'Values': corr}
            correlRolling_ = correlRolling_.append(df , ignore_index= True)
        else:
            print("Histo prices is missing for ticker : " + str(ticker))
correl_RealisedWinsor_3M = correlRolling_.pivot(index ='Dates', columns='Tickers', values ='Values')

## 6M

In [None]:
#126
NDay = 126
nWinsor = 6
pctWinsor = .025

nDailyObs = NDay + (nbDayReturn - 1) + nWinsor
startDate = '2018-01-02'
indexTicker = 'SX5E Index'

d = {'Dates': [], 'Tickers': [], 'Values': []}
correlRolling_ = pd.DataFrame(data=d)
correlRolling_ = pd.DataFrame(columns = ['Dates', 'Tickers', 'Values'])

for ticker in return1D.columns:
    print("Computing Correlation : " +ticker )
    dates = return1D[startDate:].index
    for date in dates:
        if ticker in return1D.columns:
            stockData = return1D[ticker][:date].tail(nDailyObs).values
            idxData = return1D[indexTicker][:date].tail(nDailyObs).values
            wStockData = scipy.stats.mstats.winsorize(stockData, pctWinsor)
            stockRetData = ciRollingSum(wStockData, nbDayReturn)
            idxRetDate = ciRollingSum(idxData, nbDayReturn)
            corr = np.corrcoef(stockRetData, idxRetDate)[1,0]
            df = {'Dates':date, 'Tickers': ticker, 'Values': corr}
            correlRolling_ = correlRolling_.append(df , ignore_index= True)
        else:
            print("Histo prices is missing for ticker : " + str(ticker))
correl_RealisedWinsor_6M = correlRolling_.pivot(index ='Dates', columns='Tickers', values ='Values')