In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import requests
from sklearn.linear_model import LinearRegression

# Statsmodels for time series analysis
from statsmodels.graphics.tsaplots import month_plot, quarter_plot, plot_acf, plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose

# Sklearn for metrics and parameter grid
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from sklearn.model_selection import ParameterGrid

pd.set_option('display.max_rows',60000)
pd.set_option('display.max_columns',500)
pd.set_option('display.max_colwidth',200)
pd.options.display.float_format = '{:,.4f}'.format

In [2]:
# Custom RMSE function
def rmse_numpy(y_true, y_pred):
    """
    Calculate Root Mean Squared Error (RMSE).
    :param y_true: Ground truth values (actual).
    :param y_pred: Predicted values (forecast).
    :return: RMSE value.
    """
    return np.sqrt(np.mean((y_true - y_pred) ** 2))

# TFT 

In [3]:
tft = pd.read_csv('outputs/quantile_tft_0307.csv')
tft.columns = ['stock','10_pct','50_pct','90_pct']
df = pd.read_csv('dataset_250222_run.csv')

In [4]:
collect_tft_score = [] 

for t in list(tft['stock'].unique()): 
    tft_ff = tft[tft['stock']==t].reset_index(drop=True)

    ticker_frame = df[(df['ticker_id']==t)].reset_index(drop=True)
    index_1 = ticker_frame[ticker_frame['date']=='2024-10-04'].index.values[0]
    index_2 = index_1 - 79
    index_3 = index_1 + 41

    close_pctc_true_array = np.array(ticker_frame.iloc[index_1 + 1:index_3]['close_pctc'])

    tft_ff['actual'] = close_pctc_true_array
    tft_ff["within_bounds"] = (tft_ff["actual"] >= tft_ff["10_pct"]) & (tft_ff["actual"] <= tft_ff["90_pct"])
    score_ = tft_ff['within_bounds'].value_counts(normalize=True).get(True, 0)

    rmse_10 = rmse_numpy(np.array(tft_ff["actual"]), np.array(tft_ff["10_pct"]))
    rmse_90 = rmse_numpy(np.array(tft_ff["actual"]), np.array(tft_ff["90_pct"]))
    
    data_f =  {'ticker':[t], 
              'within_score':[score_], 
              'rmse_10':[rmse_10], 
             'rmse_90':[rmse_90]}

    collect_tft_score.append(pd.DataFrame(data_f))

In [5]:
tft_score = pd.concat(collect_tft_score)

In [6]:
tft_score.head()

Unnamed: 0,ticker,within_score,rmse_10,rmse_90
0,PGR,0.9,0.0261,0.0254
0,UNH,0.9,0.0329,0.0356
0,LLY,0.825,0.0398,0.0415
0,IBM,0.775,0.0218,0.0241
0,GS,0.775,0.0365,0.0294


# Morte Carlo 

In [31]:
df = pd.read_csv('dataset_250222_run.csv')
ticker_list = list(df['ticker_id'].unique())

In [32]:
collect_mc_score = [] 

for t in ticker_list: 
    mc = df[df['ticker_id']==t].reset_index(drop=True)
    pctc_all = mc[mc['date']<='2024-10-04']['close_pctc'][-435:] #one year of training -- mention this 

    # Define number of coefficients (past values used for prediction)
    num_coeffs = 80  # Can be tuned based on performance

    # Prepare data for regression
    X = np.array([pctc_all[i:i + num_coeffs] for i in range(len(pctc_all) - num_coeffs)])
    y = pctc_all[num_coeffs:]

    # Fit linear regression model to get coefficients
    model = LinearRegression()
    model.fit(X, y)
    coeffs = model.coef_

    # Compute standard deviation of residuals
    residuals = y - model.predict(X)
    std_dev = np.std(residuals)

    index_1 = mc[mc['date']=='2024-10-04'].index.values[0]
    index_2 = index_1 - 79
    index_3 = index_1 + 41

    close_pctc_array = np.array(mc.iloc[index_2:index_1 + 1]['close_pctc'])
    close_pctc_true_array = np.array(mc.iloc[index_1 +1:index_3]['close_pctc'])

    initial_values = np.array(close_pctc_array).reshape(-1,1) #2d array

    num_samples = 1000
    values = np.broadcast_to(initial_values, (80, num_samples))
    trajectory_length = 40

    for i in range(trajectory_length):
        next_prediction = coeffs @ values[-num_coeffs:,:]
        next_row = next_prediction + std_dev * np.random.randn(num_samples)
        values = np.append(values, next_row.reshape(1, num_samples), axis=0)

    # Assuming `values` is your DataFrame containing all samples
    values_df = pd.DataFrame(values)  # Convert values to DataFrame if it's not already

    # Extract the last 40 rows (predicted values)
    predicted_values = values_df.iloc[-40:]

    # Compute the 10th, 50th (median), and 90th percentile across the 1000 samples
    percentiles = predicted_values.quantile([0.1, 0.5, 0.9], axis=1).T
    percentiles.columns = ['10_pct','50_pct','90_pct']
    percentiles['actual'] = close_pctc_true_array
    percentiles["within_bounds"] = (percentiles["actual"] >= percentiles["10_pct"]) & (percentiles["actual"] <= percentiles["90_pct"])
    # Display the results
    #percentiles['within_bounds'].value_counts(normalize=True)

    score_ = percentiles['within_bounds'].value_counts(normalize=True).get(True, 0)

    rmse_10 = rmse_numpy(np.array(percentiles["actual"]), np.array(percentiles["10_pct"]))
    rmse_90 = rmse_numpy(np.array(percentiles["actual"]), np.array(percentiles["90_pct"]))

    data_f =  {'ticker':[t], 
              'within_score':[score_],
            'rmse_10':[rmse_10], 
             'rmse_90':[rmse_90]}

    collect_mc_score.append(pd.DataFrame(data_f))

In [33]:
mc_score = pd.concat(collect_mc_score)

# Total View

In [34]:
mini_ = pd.read_csv('mini_portfolios_0219.csv')
mapper_1 = {a:b for a,b in zip(mini_['Ticker'],mini_['Sector'])}

In [35]:
total_score = pd.merge(tft_score, mc_score, on='ticker', how='left',suffixes= ('_tft', '_mc'))
total_score['sector'] = total_score['ticker'].map(mapper_1)

In [36]:
total_score[['within_score_tft', 'within_score_mc']].mean()

within_score_tft   0.8493
within_score_mc    0.7542
dtype: float64

In [43]:
print(total_score.groupby('sector')[['within_score_tft', 'within_score_mc']].mean())

                        within_score_tft  within_score_mc
sector                                                   
Communication Services            0.8375           0.8125
Consumer Cyclical                 0.8000           0.6833
Energy                            0.8667           0.7500
Financial Services                0.8750           0.7833
Healthcare                        0.8542           0.7375
Technology                        0.8625           0.7583


In [44]:
print(total_score.groupby('sector')[['rmse_10_tft', 'rmse_10_mc']].mean())

                        rmse_10_tft  rmse_10_mc
sector                                         
Communication Services       0.0279      0.0280
Consumer Cyclical            0.0342      0.0336
Energy                       0.0291      0.0236
Financial Services           0.0277      0.0254
Healthcare                   0.0290      0.0260
Technology                   0.0438      0.0293


In [40]:
total_score.groupby('sector')[['rmse_90_tft', 'rmse_90_mc']].mean()

Unnamed: 0_level_0,rmse_90_tft,rmse_90_mc
sector,Unnamed: 1_level_1,Unnamed: 2_level_1
Communication Services,0.0245,0.0252
Consumer Cyclical,0.0299,0.0278
Energy,0.0276,0.0216
Financial Services,0.0249,0.0215
Healthcare,0.0302,0.0253
Technology,0.0423,0.0264


In [47]:
print(total_score[['rmse_10_tft', 'rmse_10_mc']].mean())

rmse_10_tft   0.0320
rmse_10_mc    0.0276
dtype: float64


In [48]:
print(total_score[['rmse_90_tft', 'rmse_90_mc']].mean())

rmse_90_tft   0.0299
rmse_90_mc    0.0246
dtype: float64


In [45]:
total_score

Unnamed: 0,ticker,within_score_tft,rmse_10_tft,rmse_90_tft,within_score_mc,rmse_10_mc,rmse_90_mc,sector
0,PGR,0.9,0.0261,0.0254,0.8,0.0248,0.0214,Financial Services
1,UNH,0.9,0.0329,0.0356,0.675,0.0259,0.0247,Healthcare
2,LLY,0.825,0.0398,0.0415,0.75,0.0324,0.0347,Healthcare
3,IBM,0.775,0.0218,0.0241,0.65,0.0228,0.0212,Technology
4,GS,0.775,0.0365,0.0294,0.675,0.0335,0.0275,Financial Services
5,VZ,0.725,0.0189,0.0205,0.775,0.0211,0.0223,Communication Services
6,AAPL,0.9,0.0215,0.0234,0.85,0.0211,0.0192,Technology
7,MSFT,0.95,0.0316,0.0318,0.8,0.0217,0.0219,Technology
8,CB,0.875,0.02,0.022,0.85,0.0189,0.018,Financial Services
9,CTRA,0.8,0.0261,0.0253,0.75,0.026,0.0231,Energy
