In [1]:
import datetime as dt

import pandas as pd
from sklearn.metrics import mean_absolute_percentage_error, r2_score, mean_absolute_error
from sklearn.preprocessing import (
    OneHotEncoder,
    MinMaxScaler
)
from sklearn.neighbors import KNeighborsRegressor

from analytics.machine_learning.price_prediction_with_fundamentals import utils

In [2]:
dataset = utils.get_dataset(only_value_change_columns=True)

In [3]:
train_set, test_set = utils.split_data_to_train_and_test(
    df=dataset,
    cutoff_date=dt.datetime(2023,6,1)
)

In [4]:
y_train = train_set[['avg_next_three_months_price', 'sector']]
X_train = train_set.drop(['avg_next_three_months_price'], axis=1)

y_test = test_set[['avg_next_three_months_price', 'sector']]
X_test = test_set.drop(['avg_next_three_months_price'], axis=1)

In [5]:
one_hot_encoder = OneHotEncoder()
# scaler = MinMaxScaler(feature_range=(0, 1))

X_train_transformed = utils.transform_input(
    X=X_train,
    one_hot_encoder=one_hot_encoder,
    # scaler=scaler,
    fit=True
)

X_test_transformed = utils.transform_input(
    X=X_test,
    one_hot_encoder=one_hot_encoder,
    # scaler=scaler,
    fit=False
)

In [6]:
neigh_regressor = KNeighborsRegressor(
    n_neighbors=10,
    weights='distance'
)

neigh_regressor.fit(X_train_transformed, y_train['avg_next_three_months_price'])
y_pred_neigh_regressor = pd.Series(neigh_regressor.predict(X_test_transformed))

In [7]:
print("Mean absolute error: %.2f" % mean_absolute_error(y_test['avg_next_three_months_price'], y_pred_neigh_regressor))
print("Coefficient of determination: %.2f" % r2_score(y_test['avg_next_three_months_price'], y_pred_neigh_regressor))
print("Mean absolute pct error: %.2f" % mean_absolute_percentage_error(y_test['avg_next_three_months_price'], y_pred_neigh_regressor))

Mean absolute error: 53.22
Coefficient of determination: 0.11
Mean absolute pct error: 2.32


In [8]:
from sklearn.neighbors import RadiusNeighborsRegressor

radius_regressor = RadiusNeighborsRegressor(radius=1.0, weights='distance')

radius_regressor.fit(X_train_transformed, y_train['avg_next_three_months_price'])
y_pred_radius_regressor = pd.Series(neigh_regressor.predict(X_test_transformed))

In [9]:
print("Mean absolute error: %.2f" % mean_absolute_error(y_test['avg_next_three_months_price'], y_pred_radius_regressor))
print("Coefficient of determination: %.2f" % r2_score(y_test['avg_next_three_months_price'], y_pred_radius_regressor))
print("Mean absolute pct error: %.2f" % mean_absolute_percentage_error(y_test['avg_next_three_months_price'], y_pred_radius_regressor))

Mean absolute error: 53.22
Coefficient of determination: 0.11
Mean absolute pct error: 2.32


In [42]:
from sklearn.svm import LinearSVR

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler


sv_regr = make_pipeline(
    StandardScaler(),
    LinearSVR(
        dual="auto",
        random_state=0, 
        tol=1e-20,
        C=0.1,
        max_iter=20000,
        #loss="squared_epsilon_insensitive"
        epsilon=0.5
    )
)
sv_regr.fit(X_train_transformed, y_train['avg_next_three_months_price'])

y_pred_sv_regr =  sv_regr.predict(X_test_transformed)



In [43]:
print("Mean absolute error: %.2f" % mean_absolute_error(y_test['avg_next_three_months_price'], y_pred_sv_regr))
print("Coefficient of determination: %.2f" % r2_score(y_test['avg_next_three_months_price'], y_pred_sv_regr))
print("Mean absolute pct error: %.2f" % mean_absolute_percentage_error(y_test['avg_next_three_months_price'], y_pred_sv_regr))

Mean absolute error: 6.54
Coefficient of determination: 0.99
Mean absolute pct error: 0.16


In [48]:
from sklearn.svm import SVR

svr = make_pipeline(
    StandardScaler(),
    SVR(C=10.0, epsilon=0.2, gamma=1)
)

svr.fit(X_train_transformed, y_train['avg_next_three_months_price'])

y_pred_svr =  sv_regr.predict(X_test_transformed)

In [49]:
print("Mean absolute error: %.2f" % mean_absolute_error(y_test['avg_next_three_months_price'], y_pred_svr))
print("Coefficient of determination: %.2f" % r2_score(y_test['avg_next_three_months_price'], y_pred_svr))
print("Mean absolute pct error: %.2f" % mean_absolute_percentage_error(y_test['avg_next_three_months_price'], y_pred_svr))

Mean absolute error: 6.54
Coefficient of determination: 0.99
Mean absolute pct error: 0.16


Mean absolute error: 6.54
Coefficient of determination: 0.99
Mean absolute pct error: 0.16

In [51]:
utils.calculate_avg_pct_loss_per_sector(
    y_pred=y_test['avg_next_three_months_price'],
    y_actual=y_pred_svr,
    sector_series=y_test['sector']
)

{'MANUFACTURING': 14.393864563411435,
 'LIFE SCIENCES': 21.954672141013774,
 'TRADE & SERVICES': 15.369934686894256,
 'TECHNOLOGY': 15.930359353501624,
 'FINANCE': 13.336949526436971,
 'ENERGY & TRANSPORTATION': 14.565012829236514,
 'REAL ESTATE & CONSTRUCTION': 15.77024103385572}