<a href="https://colab.research.google.com/github/Glenmond/sgdneer/blob/main/neer_weights.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install numpy
!pip install pandas
!pip install statsmodels
!pip install scipy



In [2]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy.optimize import minimize
# For splitting the dataset into training and testing sets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import plotly.graph_objects as go
import plotly.express as px

# Metric for evaluation
from sklearn.metrics import mean_squared_error

# Statsmodels for Linear Regression
import statsmodels.api as sm

# Hiding warnings
import warnings
warnings.filterwarnings("ignore")

In [3]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

!ls /content/drive/

Mounted at /content/drive
MyDrive


In [39]:
# Settings
base_date = '2015-01-02' # setting base date for data, '2022-10-21'
model_date = '2022-10-21'
test_size = 0.3 # setting train test split ratio for modelling

In [40]:
df = pd.read_excel("drive/My Drive/Neer/input/mas.xlsx")
df.rename(columns={'avg_wk_end': 'date', 'index': 'neer'}, inplace=True)
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)
df1 = df.iloc[:, :4]
df2 = df.iloc[:, 4:]
df2 = df2.apply(lambda x: x/100) # To scale the data from S$ per 100 units to S$ per unit (MAS data limitation)
df = pd.concat([df1, df2], axis=1)
raw_df = df.copy()
df

Unnamed: 0_level_0,neer,EUR,GBP,USD,AUD,CNY,INR,IDR,JPY,KRW,MYR,TWD,PHP,THB
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2015-01-02,122.80,1.5994,2.0633,1.3264,1.0784,0.2137,0.020910,0.000106,0.011024,0.001204,0.3779,0.041848,0.029614,0.040218
2015-01-09,122.62,1.5780,2.0175,1.3366,1.0880,0.2153,0.021403,0.000106,0.011184,0.001223,0.3760,0.041825,0.029737,0.040663
2015-01-16,122.65,1.5418,2.0119,1.3246,1.0916,0.2135,0.021313,0.000105,0.011395,0.001226,0.3710,0.041948,0.029670,0.040557
2015-01-23,122.79,1.5197,2.0077,1.3374,1.0731,0.2149,0.021775,0.000107,0.011298,0.001234,0.3720,0.042721,0.030306,0.041012
2015-01-30,122.30,1.5300,2.0363,1.3512,1.0515,0.2162,0.021853,0.000107,0.011449,0.001233,0.3729,0.042904,0.030646,0.041296
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-01-31,139.17,1.4072,1.6828,1.3552,0.8429,0.1869,0.015641,0.000083,0.008780,0.000931,0.3066,0.041129,0.023225,0.040261
2025-02-07,139.14,1.4026,1.6793,1.3512,0.8496,0.1855,0.015452,0.000083,0.008915,0.000934,0.3043,0.041189,0.023294,0.040131
2025-02-14,139.53,1.4057,1.6879,1.3440,0.8498,0.1845,0.015476,0.000082,0.008794,0.000931,0.3025,0.041021,0.023197,0.039893
2025-02-21,139.86,1.4012,1.6903,1.3342,0.8541,0.1843,0.015411,0.000082,0.008875,0.000932,0.3020,0.040771,0.023037,0.039713


In [41]:
# filter df based on dates
df = df[df.index >= base_date]
df = df / df.loc[base_date]
df = df * 100 # make it over 100%
df = df.apply(lambda x: np.log(x / x.shift(1))) # get log returns
df = df.dropna() # remove na rows for missing data
df1 = df.iloc[:, :1]
df2 = df.iloc[:, 1:]
df2 = df2.apply(lambda x: x*-1) # convert FX returns into SGD as base ccy
df = pd.concat([df1, df2], axis=1)
df

Unnamed: 0_level_0,neer,EUR,GBP,USD,AUD,CNY,INR,IDR,JPY,KRW,MYR,TWD,PHP,THB
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2015-01-09,-0.001467,0.013470,0.022448,-0.007661,-0.008863,-0.007459,-0.023304,0.003492,-0.014409,-0.015658,0.005040,0.000550,-0.004145,-0.011004
2015-01-16,0.000245,0.023208,0.002780,0.009019,-0.003303,0.008396,0.004214,0.006165,-0.018690,-0.002450,0.013387,-0.002937,0.002256,0.002610
2015-01-23,0.001141,0.014438,0.002090,-0.009617,0.017093,-0.006536,-0.021445,-0.021180,0.008549,-0.006504,-0.002692,-0.018260,-0.021209,-0.011156
2015-01-30,-0.003999,-0.006755,-0.014145,-0.010266,0.020334,-0.006031,-0.003576,0.002705,-0.013277,0.000811,-0.002416,-0.004274,-0.011156,-0.006901
2015-02-06,-0.002374,-0.007618,-0.012202,0.004673,-0.000285,0.003243,0.002933,0.004775,-0.001309,-0.003239,-0.014906,0.001703,0.006482,0.000702
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-01-31,0.000575,0.002059,-0.005841,-0.003844,0.011911,-0.004290,0.000320,0.002524,-0.011224,0.013867,-0.002613,0.004803,-0.004877,-0.007529
2025-02-07,-0.000216,0.003274,0.002082,0.002956,-0.007917,0.007519,0.012157,0.004462,-0.015259,-0.003217,0.007530,-0.001458,-0.002967,0.003234
2025-02-14,0.002799,-0.002208,-0.005108,0.005343,-0.000235,0.005405,-0.001552,0.002784,0.013666,0.003217,0.005933,0.004087,0.004173,0.005948
2025-02-21,0.002362,0.003206,-0.001421,0.007318,-0.005047,0.001085,0.004209,0.007421,-0.009169,-0.001074,0.001654,0.006113,0.006921,0.004522


# OLS Regression
Assumptions:
1. Linear relationship between independent and dependent variable
2. There should be little to no multicollinearity in the data. No high correlation with independent variables.
3. Observations should be independent of each other.
4. The residuals - errors between the predicted values and true values - should be normally distributed.
5. Variance of residuals should be constant across all levels of the dependent variable, described as homoscedasticity.

Refer to link: https://medium.com/latinxinai/mastering-linear-regression-with-statsmodels-95233a2a602e

In [42]:
# check for multicollinearity
corr = df.corr()
corr.style.background_gradient(cmap='coolwarm')
# 'RdBu_r', 'BrBG_r', & PuOr_r are other good diverging colormaps

Unnamed: 0,neer,EUR,GBP,USD,AUD,CNY,INR,IDR,JPY,KRW,MYR,TWD,PHP,THB
neer,1.0,0.138023,0.081264,0.528778,-0.062867,0.53184,0.381694,0.196482,0.270779,0.013677,0.151007,0.433455,0.41392,0.250846
EUR,0.138023,1.0,0.37857,-0.163643,0.187369,-0.112105,-0.146653,-0.110724,0.085079,-0.059334,-0.140526,-0.202481,-0.126983,-0.06124
GBP,0.081264,0.37857,1.0,-0.169397,0.223106,-0.064191,-0.09765,-0.02154,-0.144865,-0.006718,0.00023,-0.101931,-0.089995,-0.009786
USD,0.528778,-0.163643,-0.169397,1.0,-0.438436,0.592199,0.59112,0.075401,0.035239,-0.215079,-0.051275,0.503207,0.554448,0.127708
AUD,-0.062867,0.187369,0.223106,-0.438436,1.0,-0.237686,-0.217872,0.172575,-0.142375,0.267845,0.072964,-0.157641,-0.211539,-0.015637
CNY,0.53184,-0.112105,-0.064191,0.592199,-0.237686,1.0,0.403523,0.056263,0.046731,-0.049518,0.037728,0.465706,0.375688,0.138724
INR,0.381694,-0.146653,-0.09765,0.59112,-0.217872,0.403523,1.0,0.230681,-0.087636,0.009035,0.024221,0.410338,0.456356,0.132503
IDR,0.196482,-0.110724,-0.02154,0.075401,0.172575,0.056263,0.230681,1.0,-0.102118,0.088241,0.225802,0.120988,0.176153,0.160007
JPY,0.270779,0.085079,-0.144865,0.035239,-0.142375,0.046731,-0.087636,-0.102118,1.0,-0.046044,-0.156575,0.005013,0.047302,0.119093
KRW,0.013677,-0.059334,-0.006718,-0.215079,0.267845,-0.049518,0.009035,0.088241,-0.046044,1.0,0.128245,0.269211,0.039218,0.216755


In [43]:
X = df.drop('neer', axis = 1) # Selecting independent features
y = df.neer # Selecting target variable

In [44]:
# Creating training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size = test_size, # 30% of data for testing
                                                    shuffle = False, # Shuffling values
                                                    random_state = 42)

# Adding a Constant term for the Intercept - for modelling
X_train = sm.add_constant(X_train)
X_test = sm.add_constant(X_test)

In [45]:
# using training data
Y = y_train
X = X_train

# define objective function (sum of squared residuals)
def objective_function(betas):
    Estimated_Y = np.dot(X, betas)
    return np.sum((Y - Estimated_Y) ** 2)

# Defining the constraint - sum of betas equals 1
constraints = {'type': 'eq', 'fun': lambda betas: np.sum(betas) - 1}

# Initial guess for betas - use any numbers that sum to 1
initial_betas = np.zeros(X.shape[1])

# Minimize the objective function and calculating the estimated values
result = minimize(objective_function, initial_betas, constraints=constraints)
print("Optimized Coefficients:", result.x)

Optimized Coefficients: [7.14678458e-06 1.19348482e-01 4.18038065e-02 1.22402680e-01
 6.52689512e-02 1.22929881e-01 3.36438595e-02 3.23686811e-02
 9.91547083e-02 3.90562363e-02 1.16511839e-01 9.22357088e-02
 4.37207244e-02 7.15472960e-02]


In [46]:
optimized_betas = result.x
Weights = [x*100 for x in optimized_betas] # scale weights in accordance to 100
Currency=list(X.columns)
Currency_Weights_dict = dict(zip(Currency, Weights))
sorted_Currency_Weights = sorted(Currency_Weights_dict.items(), key=lambda item: item[1], reverse=True)

In [47]:
ccy_df = pd.DataFrame(Currency_Weights_dict.items(), columns=['ccy', 'weights'])
ccy_df = ccy_df.sort_values('weights', ascending=False)

import plotly.express as px
fig = px.bar(ccy_df, x='ccy', y='weights', color='weights', title="SGD Neer Basket Weights", labels={
                     "weights": "Weights (%)",
                     "ccy": "Ccy",},)
fig.show()

### Predict Test Dataset
Based on trained dataset, we reproduce on the testing dataset and check for the necessary stats performance.

In [48]:
temp_df = y_test.to_frame()
temp_df['Estimated_Y'] = np.dot(X_test, optimized_betas)
print(f"Correlation: {temp_df.neer.corr(temp_df.Estimated_Y)}")
print(f"RMSE: {((temp_df.neer - temp_df.Estimated_Y) ** 2).mean() ** .5}")
temp_df

Correlation: 0.6777883810849046
RMSE: 0.0016274577354312351


Unnamed: 0_level_0,neer,Estimated_Y
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-02-18,-0.001489,-0.002491
2022-02-25,-0.002670,-0.002461
2022-03-04,-0.003703,-0.002845
2022-03-11,0.001578,0.005339
2022-03-18,0.003854,0.003752
...,...,...
2025-01-31,0.000575,-0.001304
2025-02-07,-0.000216,0.001014
2025-02-14,0.002799,0.004028
2025-02-21,0.002362,0.001844


In [49]:
# Calculating cumulative log returns for deriving the official NEER (NEERo) and estimated NEER (NEERe)
cumulative_log_returnsY = np.cumsum(temp_df['neer'])
NEERo = 100 * np.exp(cumulative_log_returnsY)

cumulative_log_returnsYhat = np.cumsum(temp_df['Estimated_Y'])
NEERe = 100 * np.exp(cumulative_log_returnsYhat)

est_df = pd.concat([NEERo.to_frame(), NEERe.to_frame()], axis=1)
est_df

Unnamed: 0_level_0,neer,Estimated_Y
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-02-18,99.851190,99.751208
2022-02-25,99.584900,99.505983
2022-03-04,99.216792,99.223321
2022-03-11,99.373434,99.754474
2022-03-18,99.757206,100.129470
...,...,...
2025-01-31,108.999060,111.288246
2025-02-07,108.975564,111.401116
2025-02-14,109.281015,111.850746
2025-02-21,109.539474,112.057150


In [50]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=est_df.index, y=est_df.neer,
                    mode='lines',
                    name='Official NEER',
                    line=dict(color='green')))
fig.add_trace(go.Scatter(x=est_df.index, y=est_df.Estimated_Y,
                    mode='lines',
                    name='Estimated NEER',
                    line=dict(color='orange')))
# Edit the layout
fig.update_layout(
        title=dict(
            text='SGD NEER Levels'
        ),
        xaxis=dict(
            title=dict(
                text='Date'
            )
        ),
        yaxis=dict(
            title=dict(
                text='NEER Index'
            )
        ),
)

fig.show()

# Estimating Best Weights Based on latest data
Using the same method, we perform it based on the latest data to get the optimal performance of the model weightings.

In [51]:
# using modelling date set at the start
df = raw_df
df = df[df.index >= model_date]
df = df / df.loc[model_date]
df = df * 100 # make it over 100%
df = df.apply(lambda x: np.log(x / x.shift(1))) # get log returns
df = df.dropna() # remove na rows for missing data
df1 = df.iloc[:, :1]
df2 = df.iloc[:, 1:]
df2 = df2.apply(lambda x: x*-1) # convert FX returns into SGD as base ccy
df = pd.concat([df1, df2], axis=1)
df

Unnamed: 0_level_0,neer,EUR,GBP,USD,AUD,CNY,INR,IDR,JPY,KRW,MYR,TWD,PHP,THB
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-10-28,0.002672,-0.007934,-0.019035,0.014759,-0.017965,0.012264,0.008455,0.010001,-0.014026,-0.000000,0.011356,0.011962,-0.002805,-0.003276
2022-11-04,0.002295,0.014052,0.024685,-0.008148,0.014056,-0.001028,-0.003916,0.005427,0.005420,-0.007032,-0.003019,-0.005079,0.002144,-0.006307
2022-11-11,0.001256,-0.017464,-0.016669,0.023777,-0.019317,0.008770,0.000292,0.009260,-0.018739,-0.040221,0.001676,-0.001203,0.006045,-0.023225
2022-11-18,-0.001182,-0.010797,-0.012367,0.008345,-0.007190,0.002075,0.018430,0.019695,-0.007154,0.013553,-0.011009,-0.000431,0.006374,0.002632
2022-11-25,-0.001998,-0.003224,-0.016075,0.000292,-0.008000,0.005206,0.001189,-0.000686,-0.007608,-0.007767,-0.020362,-0.010626,-0.010518,-0.005725
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-01-31,0.000575,0.002059,-0.005841,-0.003844,0.011911,-0.004290,0.000320,0.002524,-0.011224,0.013867,-0.002613,0.004803,-0.004877,-0.007529
2025-02-07,-0.000216,0.003274,0.002082,0.002956,-0.007917,0.007519,0.012157,0.004462,-0.015259,-0.003217,0.007530,-0.001458,-0.002967,0.003234
2025-02-14,0.002799,-0.002208,-0.005108,0.005343,-0.000235,0.005405,-0.001552,0.002784,0.013666,0.003217,0.005933,0.004087,0.004173,0.005948
2025-02-21,0.002362,0.003206,-0.001421,0.007318,-0.005047,0.001085,0.004209,0.007421,-0.009169,-0.001074,0.001654,0.006113,0.006921,0.004522


In [52]:
# Modelling code
X = df.drop('neer', axis = 1) # Selecting independent features
Y = df.neer # Selecting target variable

# Adding a Constant term for the Intercept - for modelling
X = sm.add_constant(X)

# define objective function (sum of squared residuals)
def objective_function(betas):
    Estimated_Y = np.dot(X, betas)
    return np.sum((Y - Estimated_Y) ** 2)

# Defining the constraint - sum of betas equals 1
constraints = {'type': 'eq', 'fun': lambda betas: np.sum(betas) - 1}

# Initial guess for betas - use any numbers that sum to 1
initial_betas = np.zeros(X.shape[1])

# Minimize the objective function and calculating the estimated values
result = minimize(objective_function, initial_betas, constraints=constraints)
print("Optimized Coefficients:", result.x)

optimized_betas = result.x
Weights = [x*100 for x in optimized_betas] # scale weights in accordance to 100
Currency=list(X.columns)
Currency_Weights_dict = dict(zip(Currency, Weights))
sorted_Currency_Weights = sorted(Currency_Weights_dict.items(), key=lambda item: item[1], reverse=True)

ccy_df = pd.DataFrame(Currency_Weights_dict.items(), columns=['ccy', 'weights'])
ccy_df = ccy_df.sort_values('weights', ascending=False)

fig = px.bar(ccy_df, x='ccy', y='weights', color='weights', title="SGD Neer Basket Weights", labels={
                     "weights": "Weights (%)",
                     "ccy": "Ccy",},)
fig.show()

Optimized Coefficients: [-3.29952668e-05  1.13766376e-01  6.43989873e-02  1.52888748e-01
  7.73123637e-02  1.57714954e-01  8.05192288e-02  1.40146061e-02
  1.06573343e-01  1.38668965e-02  7.79927533e-02  9.51116301e-02
  3.24541326e-02  1.34189753e-02]


In [53]:
temp_df = Y.to_frame()
temp_df['Estimated_Y'] = np.dot(X, optimized_betas)
print(f"Correlation: {temp_df.neer.corr(temp_df.Estimated_Y)}")
print(f"RMSE: {((temp_df.neer - temp_df.Estimated_Y) ** 2).mean() ** .5}")

# Calculating cumulative log returns for deriving the official NEER (NEERo) and estimated NEER (NEERe)
cumulative_log_returnsY = np.cumsum(temp_df['neer'])
NEERo = 100 * np.exp(cumulative_log_returnsY)

cumulative_log_returnsYhat = np.cumsum(temp_df['Estimated_Y'])
NEERe = 100 * np.exp(cumulative_log_returnsYhat)

est_df = pd.concat([NEERo.to_frame(), NEERe.to_frame()], axis=1)

fig = go.Figure()

fig.add_trace(go.Scatter(x=est_df.index, y=est_df.neer,
                    mode='lines',
                    name='Official NEER',
                    line=dict(color='green')))
fig.add_trace(go.Scatter(x=est_df.index, y=est_df.Estimated_Y,
                    mode='lines',
                    name='Estimated NEER',
                    line=dict(color='orange')))
# Edit the layout
fig.update_layout(
        title=dict(
            text='SGD NEER Levels'
        ),
        xaxis=dict(
            title=dict(
                text='Date'
            )
        ),
        yaxis=dict(
            title=dict(
                text='NEER Index'
            )
        ),
)

fig.show()

Correlation: 0.7336458004328841
RMSE: 0.0010771843989593943


# Calculating actual SGDNEER index
Using the SGDNEER equation, we seek to calculate the actual SGDNEER index.

In [54]:
df = raw_df
df = df[df.index >= model_date]
df = df.dropna() # remove na rows for missing data
df1 = df.iloc[:, :1]
df2 = df.iloc[:, 1:]
df2 = df2.apply(lambda x: 1/x) # convert into SGD cross
df = pd.concat([df1, df2], axis=1)
df

Unnamed: 0_level_0,neer,EUR,GBP,USD,AUD,CNY,INR,IDR,JPY,KRW,MYR,TWD,PHP,THB
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-10-21,134.56,0.717618,0.625978,0.700967,1.118944,5.078720,58.061894,10934.937124,105.407400,1008.064516,3.321156,22.562668,41.305246,26.896904
2022-10-28,134.92,0.711946,0.614175,0.711389,1.099022,5.141388,58.554866,11044.842059,103.939299,1008.064516,3.359086,22.834178,41.189554,26.808933
2022-11-04,135.23,0.722022,0.629525,0.705617,1.114579,5.136107,58.326043,11104.941699,104.504128,1001.001001,3.348962,22.718495,41.277966,26.640381
2022-11-11,135.40,0.709522,0.619118,0.722596,1.093255,5.181347,58.343057,11208.249271,102.564103,961.538462,3.354579,22.691173,41.528239,26.028788
2022-11-18,135.24,0.701902,0.611509,0.728651,1.085423,5.192108,59.428300,11431.184271,101.832994,974.658869,3.317850,22.681394,41.793789,26.097395
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-01-31,139.17,0.710631,0.594248,0.737898,1.186380,5.350455,63.934531,12033.694344,113.895216,1074.113856,3.261579,24.313745,43.057051,24.837932
2025-02-07,139.14,0.712962,0.595486,0.740083,1.177024,5.390836,64.716542,12087.513598,112.170499,1070.663812,3.286231,24.278327,42.929510,24.918392
2025-02-14,139.53,0.711389,0.592452,0.744048,1.176747,5.420054,64.616180,12121.212121,113.713896,1074.113856,3.305785,24.377758,43.109023,25.067054
2025-02-21,139.86,0.713674,0.591611,0.749513,1.170823,5.425936,64.888716,12211.503236,112.676056,1072.961373,3.311258,24.527237,43.408430,25.180671


In [55]:
# SGDNEER index
def calculateIndex(row):
    listValues = []
    const = Currency_Weights_dict['const']
    for k, v in Currency_Weights_dict.items():
      if k == 'const':
        # print(k)
        # print(row[k])
        # listValues.append(row[k])
        continue
      listValues.append(row[k]*v/100)
    return sum(listValues)-const/100

# const
df['newNeer'] = df.apply(lambda x: calculateIndex(x), axis=1)
df

Unnamed: 0_level_0,neer,EUR,GBP,USD,AUD,CNY,INR,IDR,JPY,KRW,MYR,TWD,PHP,THB,newNeer
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2022-10-21,134.56,0.717618,0.625978,0.700967,1.118944,5.078720,58.061894,10934.937124,105.407400,1008.064516,3.321156,22.562668,41.305246,26.896904,188.359388
2022-10-28,134.92,0.711946,0.614175,0.711389,1.099022,5.141388,58.554866,11044.842059,103.939299,1008.064516,3.359086,22.834178,41.189554,26.808933,189.815274
2022-11-04,135.23,0.722022,0.629525,0.705617,1.114579,5.136107,58.326043,11104.941699,104.504128,1001.001001,3.348962,22.718495,41.277966,26.640381,190.591806
2022-11-11,135.40,0.709522,0.619118,0.722596,1.093255,5.181347,58.343057,11208.249271,102.564103,961.538462,3.354579,22.691173,41.528239,26.028788,191.290758
2022-11-18,135.24,0.701902,0.611509,0.728651,1.085423,5.192108,59.428300,11431.184271,101.832994,974.658869,3.317850,22.681394,41.793789,26.097395,194.612915
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-01-31,139.17,0.710631,0.594248,0.737898,1.186380,5.350455,63.934531,12033.694344,113.895216,1074.113856,3.261579,24.313745,43.057051,24.837932,206.293376
2025-02-07,139.14,0.712962,0.595486,0.740083,1.177024,5.390836,64.716542,12087.513598,112.170499,1070.663812,3.286231,24.278327,42.929510,24.918392,206.880767
2025-02-14,139.53,0.711389,0.592452,0.744048,1.176747,5.420054,64.616180,12121.212121,113.713896,1074.113856,3.305785,24.377758,43.109023,25.067054,207.580905
2025-02-21,139.86,0.713674,0.591611,0.749513,1.170823,5.425936,64.888716,12211.503236,112.676056,1072.961373,3.311258,24.527237,43.408430,25.180671,208.769053


In [56]:
# # OLS regression, but not able to set constraints.

# # Fitting model
# model = sm.OLS(y_train, X_train) #.fit()
# # model = sm.OLS(y_train, X_train).fit_constrained(lambda betas: np.sum(betas) - 1
# res=model.fit_regularized(method='elastic_net', alpha=0.0, L1_wt=1.0, start_params=None, profile_scale=False, refit=False)
# model_fit_regularized = model.fit(params=res.params)
# print(model_fit_regularized.summary())

# model = model_fit_regularized
# y_pred = model.predict(X_test) # Running predictions
# # rmse = mean_squared_error(y_test, y_pred, squared = False) # Computing RMSE
# rmse = mean_squared_error(y_test, y_pred) # Computing RMSE

# print(f'\nRoot Mean Squared Error for Baseline Model: {rmse:.6f}')


# new_df = y_pred.to_frame()
# new_df.rename(columns={0: 'Estimated_Y'}, inplace=True)
# new_df = pd.concat([new_df, y_test.to_frame()], axis=1)
# new_df

# new_df['Estimated_Y'].corr(new_df['neer'])

# # Calculating cumulative log returns for deriving the official NEER (NEERo) and estimated NEER (NEERe)
# cumulative_log_returnsY = np.cumsum(new_df['neer'])
# NEERo = 100 * np.exp(cumulative_log_returnsY)

# cumulative_log_returnsYhat = np.cumsum(new_df['Estimated_Y'])
# NEERe = 100 * np.exp(cumulative_log_returnsYhat)

# # Plotting offical vs. estimated NEER
# plt.figure(figsize=(8, 4))
# plt.plot(NEERo, label='Official NEER')
# plt.plot(NEERe, label='Estimated NEER')
# plt.title('NEER levels, 21 Oct 2022=100')
# plt.legend()
# plt.show()


# # using latest dataset to train the model

# # Creating training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y,
#                                                    test_size = .3, # 30% of data for testing
#                                                    shuffle = False, # Shuffling values
#                                                    random_state = 42)

# # Adding a Constant term for the Intercept
# X_train = sm.add_constant(X_train)
# X_test = sm.add_constant(X_test)

# # Fitting model
# model = sm.OLS(y_test, X_test) #.fit()
# # model = sm.OLS(y_train, X_train).fit_constrained(lambda betas: np.sum(betas) - 1
# res=model.fit_regularized(method='elastic_net', alpha=0.0, L1_wt=1.0, start_params=None, profile_scale=False, refit=False)
# model_fit_regularized = model.fit(params=res.params)
# print(model_fit_regularized.summary())

# model_fit_regularized.params


# model = model_fit_regularized
# y_pred = model.predict(X_test) # Running predictions
# # rmse = mean_squared_error(y_test, y_pred, squared = False) # Computing RMSE
# rmse = mean_squared_error(y_test, y_pred) # Computing RMSE

# print(f'\nRoot Mean Squared Error for Baseline Model: {rmse:.6f}')

# new_df = y_pred.to_frame()
# new_df.rename(columns={0: 'Estimated_Y'}, inplace=True)
# new_df = pd.concat([new_df, y_test.to_frame()], axis=1)
# new_df['Estimated_Y'].corr(new_df['neer'])


# # Calculating cumulative log returns for deriving the official NEER (NEERo) and estimated NEER (NEERe)
# cumulative_log_returnsY = np.cumsum(new_df['neer'])
# NEERo = 100 * np.exp(cumulative_log_returnsY)

# cumulative_log_returnsYhat = np.cumsum(new_df['Estimated_Y'])
# NEERe = 100 * np.exp(cumulative_log_returnsYhat)

# # Plotting offical vs. estimated NEER
# plt.figure(figsize=(8, 4))
# plt.plot(NEERo, label='Official NEER')
# plt.plot(NEERe, label='Estimated NEER')
# plt.title('NEER levels, 21 Oct 2022=100')
# plt.legend()
# plt.show()


# weights_df = model_fit_regularized.params
# weights_df = weights_df.to_frame().rename(columns={0: 'weights'})
# weights_df['weights'] = weights_df['weights'] * 100
# weights_df = weights_df.iloc[1:, :] # remove the constant row for plotting
# weights_df

# Currency_Weights_dict = weights_df.to_dict()['weights']
# # Assigning the corresponding FX names to the Weights and sorting in descending order
# sorted_Currency_Weights = sorted(Currency_Weights_dict.items(), key=lambda item: item[1], reverse=True)

# # Plotting the estimated Weights
# import matplotlib.pyplot as plt
# plt.figure(figsize=(8, 4))
# bars=plt.bar([p[0] for p in sorted_Currency_Weights], [p[1] for p in sorted_Currency_Weights])

# for bar in bars:
#     yval = bar.get_height()
#     plt.text(bar.get_x() + bar.get_width()/2, yval, round(yval, 1), ha='center', va='bottom')

# plt.xlabel('Currency')
# plt.ylabel('Weight (%)')
# plt.title('Optimal NEER Weights')
# plt.tight_layout()

# plt.show()

In [57]:
# # Selecting feature to remove
# features_to_remove = ['Feature_2',
#                      'Feature_6']

# # Removing them
# new_X_train = X_train.drop(features_to_remove, axis =1)
# new_X_test = X_test.drop(features_to_remove, axis =1)