In [92]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.discriminant_analysis import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.linear_model  import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error


In [93]:
data = pd.read_csv('Sub_Oil_VLCC_Monthly.csv')
data

Unnamed: 0.1,Unnamed: 0,542644,548869,93219,534737,36820,542661,10066,542456,24820,...,543880+543332+542784+530997+530785,535034+535038+542800+543348+543896,543924+543376+542828+535086,542832+542696+543380+543928,543932+542700+542836+543384,547724+543360+547728+543908+547720+542812+547716+531033,543888+542792+543340+531025,543372+543368+543920+543916+542692+542688+542824+542820,543364+543912+542684+542816,8852013+8852220+10743960+10778824+10778836+10743901+10778899+10779132
0,19910101,216.0400,404.7017,146.25000,281.3800,155.250,214.4300,268.750,204.9100,312.130,...,33122.130000,45839.530000,52842.860000,48596.370000,49887.600000,60081.570000,39845.120000,52658.090000,31296.190000,13.03000
1,19910201,149.0800,421.4927,115.66667,276.2600,95.750,140.6300,280.000,136.6500,307.280,...,56664.570000,73207.630000,89338.320000,80358.930000,85764.820000,81404.760000,68741.690000,80129.440000,54080.590000,13.15000
2,19910301,119.0500,368.1324,86.33333,190.7900,80.800,107.5100,199.200,106.0300,226.210,...,35121.180000,48163.420000,55941.780000,51293.410000,52934.020000,52744.530000,42298.800000,54990.750000,33230.870000,13.20000
3,19910401,121.9400,387.0115,79.66667,200.2700,77.000,110.7000,191.375,108.9800,235.200,...,19576.480000,30092.690000,31844.400000,30321.090000,29244.860000,36829.640000,23218.850000,36851.860000,18186.700000,13.51000
4,19910501,125.7900,462.6200,82.80000,199.6400,78.600,114.9600,197.200,112.9100,234.610,...,34175.280000,47063.810000,54475.450000,50017.240000,51492.530000,58368.520000,41137.780000,53887.000000,32315.440000,13.05000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
392,20230901,684.4500,576.0000,568.45000,1024.2000,603.600,711.9000,1054.800,659.7000,1022.900,...,30863.310778,25898.544058,48481.681142,45368.326005,50994.194898,55121.678414,32958.539815,54409.884701,36524.003032,11.51250
393,20231001,660.3750,656.1875,524.37500,970.3125,556.625,700.1250,1014.750,664.6875,973.000,...,42638.283715,50128.701163,63854.584045,65398.069490,70789.307587,65391.097779,45001.265013,72728.810396,49016.415023,11.68875
394,20231101,629.7500,688.1875,489.75000,919.5625,521.875,713.3125,936.125,686.7500,880.875,...,61926.342563,67212.546340,87067.677940,82487.096193,89553.720260,88069.251739,65533.770453,87299.379274,60576.883930,11.88625
395,20231201,588.4500,589.7000,494.65000,868.7500,503.700,681.8500,881.800,615.7000,804.650,...,52011.115747,59195.962123,70954.699113,67922.393093,71746.666697,72573.820342,54569.016270,73118.989011,47718.064695,11.77250


In [94]:
features = ['542236', '67321', '549295', '41108', '541982']

In [95]:
mlp_params = {'hidden_layer_sizes': (200, 180, 140, 120, 100, 90, 80, 60, 50, 40, 30, 20, 10, 5),
              'activation': 'relu',
              'solver': 'adam',
              'max_iter': 50,
              }
lr_params = {
    StandardScaler(),
    Ridge(alpha=0.1)
}

In [96]:
from sklearn.neural_network import MLPRegressor


models = [
    ('MLP', MLPRegressor(**mlp_params)),
    ('Linear Regression', make_pipeline(StandardScaler(),
    Ridge(alpha=0.2))),
]

best_model = ""
best_accuracy = float('inf')

In [97]:

for feature in features:
    X = data[feature].values.reshape(-1, 1)
    y = data[feature]
    
    X_train = X[:36]
    y_train = y[:36]

    X_test = X[36:]
    y_test = y[36:]
    
    for name, model in models:
        # Train
        model.fit(X_train, y_train)
        
        y_pred = model.predict(X_test)
        
        accuracy = 100 * (1-abs((y_test - y_pred)/ y_test)).mean()
        mse = mean_squared_error(y_test, y_pred)
        
        print(f'\n {name} Accuracy for Feature {feature}: {accuracy.mean()}')
        print("Mean Squared Error:", mse)
        
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = (name, feature, model, X_train, y_train, X_test, y_test, y_pred)
    if best_model:
        best_name, best_feature, best_model, X_train, y_train, X_test, y_test, y_pred = best_model
        print(f'\n =====Best Model: {best_name} (Feature: {best_feature})======')  
    else:
        print("No model achieved an accuracy greater than the initial accuracy.")

        
        
        
    


 MLP Accuracy for Feature 542236: 74.52229522827703
Mean Squared Error: 129349079.98127429

 Linear Regression Accuracy for Feature 542236: 99.85393635349678
Mean Squared Error: 10092.247531788542
No model achieved an accuracy greater than the initial accuracy.

 MLP Accuracy for Feature 67321: 73.76813585279636
Mean Squared Error: 724.5312814965175

 Linear Regression Accuracy for Feature 67321: 99.87054810546859
Mean Squared Error: 0.018066372614975474
No model achieved an accuracy greater than the initial accuracy.





 MLP Accuracy for Feature 549295: 88.96741404864906
Mean Squared Error: 0.796833666547773

 Linear Regression Accuracy for Feature 549295: 99.89032211948518
Mean Squared Error: 0.00015003912642586653
No model achieved an accuracy greater than the initial accuracy.

 MLP Accuracy for Feature 41108: 74.18976720754156
Mean Squared Error: 334.6897398411977

 Linear Regression Accuracy for Feature 41108: 99.85344904344062
Mean Squared Error: 0.03132353743516184
No model achieved an accuracy greater than the initial accuracy.

 MLP Accuracy for Feature 541982: 94.31162755854862
Mean Squared Error: 17.38096981710708

 Linear Regression Accuracy for Feature 541982: 99.82767798556787
Mean Squared Error: 0.054516426621616966
No model achieved an accuracy greater than the initial accuracy.
