# Json_parse.py
Read Json data and return:
- prices_df: pd.DataFrame
- esg_s: pd.Series

In [83]:
# load test json in jupyter notebook
import json
with open("./small_dataset.json") as f:
  dic = json.load(f)

In [84]:
# Imports
import numpy as np
import pandas as pd

In [85]:
def Json_parse(input_data):
    input_data = input_data["data"]

    asset_arr = np.empty(len(input_data),dtype = str)

    iterator = 0
    price_dict = {}
    esg_dict = {}
    for asset_code in input_data:
        asset_arr[iterator] = asset_code

        # price_dict
        history_dict = input_data[asset_code]["History"]
        filtered_history_dict = {date:history_dict[date]["Close"] for date in history_dict} # Use Close price as we assume "hold overnight" investors

        price_dict[asset_code] = filtered_history_dict

        # esg_dict
        esgScores_dict = input_data[asset_code]["Sustainability"]["esgScores"]
        environmentScore = esgScores_dict["environmentScore"]
        governanceScore = esgScores_dict["governullceScore"]
        socialScore = esgScores_dict["socialScore"]
        avg_esgScore = (environmentScore + governanceScore + socialScore)/3 # Take an average
        esg_dict[asset_code] = avg_esgScore
        
        iterator += 1

    
    price_df = pd.DataFrame.from_dict(price_dict)
    esg_s = pd.Series(esg_dict)

    return price_df, esg_s, asset_arr

In [99]:
price_df, esg_s, asset_arr = Json_parse(dic)
price_df

Unnamed: 0,EXPN.L,SHEL.L,BA.L,CNA.L,TSCO.L,PRU.L,PSN.L,SDR.L,AHT.L,SSE.L,BATS.L,RTO.L,SMIN.L
2023-10-02,2634.752686,2467.754150,984.191223,145.137985,251.357986,834.915466,987.654846,376.513550,4830.365234,1502.599121,2302.589844,581.111450,1558.920288
2023-10-03,2643.633789,2463.453125,990.043701,145.818466,249.245743,834.138977,969.558899,373.120667,4745.053711,1471.627686,2280.755859,577.385071,1575.473511
2023-10-04,2651.528320,2409.931152,953.953430,143.047913,259.903046,831.615417,960.510986,371.989746,4640.131348,1459.529541,2241.181396,585.426086,1572.552368
2023-10-05,2683.105713,2420.444092,959.025635,147.491806,269.216125,830.062439,977.654480,371.518524,4686.218750,1465.820557,2282.120605,591.505981,1570.604858
2023-10-06,2726.524902,2467.275879,956.489502,147.884201,267.295898,842.097961,995.274170,374.534393,4801.927734,1470.659912,2258.921875,576.012268,1594.460938
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-09-24,3861.000000,2572.000000,1267.500000,119.599998,364.299988,664.799988,1681.500000,343.600006,5562.000000,1949.500000,2787.037109,360.600006,1725.000000
2024-09-25,3865.000000,2532.000000,1274.000000,117.550003,365.899994,642.200012,1641.000000,345.799988,5660.000000,1955.000000,2783.120117,376.700012,1764.000000
2024-09-26,3906.000000,2415.000000,1242.500000,117.050003,358.500000,681.599976,1651.000000,350.399994,5798.000000,1945.000000,2762.000000,375.700012,1756.000000
2024-09-27,3930.000000,2424.000000,1240.000000,117.199997,360.700012,700.000000,1662.500000,354.000000,5872.000000,1912.000000,2774.000000,377.200012,1761.000000


# Data

In [100]:
import numpy as np
import pandas as pd
import cvxpy as cp
from sklearn.covariance import LedoitWolf
import matplotlib.pyplot as plt
from matplotlib import cm
import seaborn as sns
import os
import warnings

# Evolutionary multiobjective
from pymoo.factory import get_sampling, get_crossover, get_mutation, get_problem
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.optimize import minimize
from pymoo.factory import get_termination
from pymoo.core.problem import ElementwiseProblem
from scipy.interpolate import griddata

In [101]:
def compute_mu_sigma(prices, lookback=252, shrinkage=True):
    """Estimate expected returns (mu) and covariance (Sigma) from price data.

    - mu: annualized simple mean of daily returns
    - Sigma: annualized covariance using Ledoit-Wolf if shrinkage=True
    """
    # compute daily returns
    rets = prices.pct_change().dropna()
    mu_daily = rets.mean()
    mu_annual = mu_daily * 252

    if shrinkage:
        lw = LedoitWolf().fit(rets.values)
        Sigma_daily = lw.covariance_
    else:
        Sigma_daily = np.cov(rets.values.T)

    Sigma_annual = Sigma_daily * 252
    mu = mu_annual.values
    Sigma = Sigma_annual
    assets = rets.columns.tolist()
    return mu, Sigma, assets

In [102]:
def compute_mu_sigma(prices_df, lookback=252, shrinkage=True):
    """
    Estimate expected returns (mu) and covariance (Sigma) from price data.

    - mu: annualized simple mean of daily returns
    - Sigma: annualized covariance using Ledoit-Wolf if shrinkage=True
    """
    # compute daily returns
    # rets = prices_df.pct_change().dropna() # percentage change between the current element and prior element
    rets = prices_df.pct_change().dropna()
    mu_daily = rets.mean()
    mu_annual = mu_daily * lookback

    if shrinkage:
        lw = LedoitWolf().fit(rets.values) # this model shrinks sample covariance towards a more stable target
        Sigma_daily = lw.covariance_
    else:
        Sigma_daily = np.cov(rets.values.T)
        
    Sigma_annual = Sigma_daily * lookback
    mu = mu_annual.values
    Sigma = Sigma_annual
    assets = rets.columns.tolist()
    
    # Test for normality of returns
    # p_values = []
    # for asset in rets.columns:
    #     stat, p = stats.shapiro(rets[asset])
    #     p_values.append(p)
        
    return mu, Sigma, assets

In [103]:
mu, Sigma, _ = compute_mu_sigma(price_df, shrinkage=False)

  rets = prices_df.pct_change().dropna()


In [105]:
_

['EXPN.L',
 'SHEL.L',
 'BA.L',
 'CNA.L',
 'TSCO.L',
 'PRU.L',
 'PSN.L',
 'SDR.L',
 'AHT.L',
 'SSE.L',
 'BATS.L',
 'RTO.L',
 'SMIN.L']