In [None]:
import pandas as pd

In [None]:
data_path = '../final_data.csv'
df = pd.read_csv(data_path)

## Data preparation:

### Splitting data into train and test:

In [4]:
def split_data(df, year_split=2022):
    """
    Split the data into training and test sets based on the year column.
    
    Parameters:
    - df: DataFrame containing the full dataset.
    - target_column: The name of the target variable column.
    - year_split: The year to split on (default is 2022).
    
    Returns:
    - train_data: DataFrame containing the training data.
    - test_data: DataFrame containing the test data.
    """
    train_data = df[df['Time'] <= year_split]
    test_data = df[df['Time'] > year_split]
    
    return train_data, test_data

In [5]:
train_data, test_data = split_data(df)
print(f"train_data's shape: {train_data.shape}")
print(f"test_data's shape: {test_data.shape}")

train_data's shape: (324, 33)
test_data's shape: (72, 33)


### Scaling data using StandardScaler:

In [6]:
import joblib

def scale_data(data, scaler_path):
    scaler = joblib.load(scaler_path)
    
    scaled_col = data.drop(['Company name', 'TSR', 'Time'], axis=1)
    
    data_scaled = scaler.transform(scaled_col)
    scaled_df = pd.DataFrame(data_scaled, columns=scaled_col.columns, index=data.index)
    final_data_scaled = data[['Company name', 'TSR', 'Time']].join(scaled_df)
    
    return final_data_scaled

In [8]:
scaler_path = './scaler_folder/standard_scaler.joblib'

In [11]:
train_data_scaled = scale_data(train_data, scaler_path)
test_data_scaled = scale_data(test_data, scaler_path)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


## Models defining:

### Support Vector Regression definition:

In [None]:
# Define
def svr():
    return

### Recurrent Support Vector Machine Regression definition:

In [None]:
# Define
def rsvm():
    return

## Model Evaluation definition:

In [None]:
from sklearn.metrics import mean_squared_error

def evaluate_model(model, test_data, target_column, feature_columns):
    X_test = test_data[feature_columns]
    y_test = test_data[target_column]
    y_pred = model.predict(X_test)

    rmse = mean_squared_error(y_test, y_pred, squared=False)

    return rmse, f"The RMSE of the {model} is {rmse}"

### Model's dumping:

In [None]:
import joblib
def save_file(scaler, filename):
    joblib.dump(scaler, filename)
    return f"Saving {filename} successfully executed"

## Model training:

In [None]:
feature_columns = [col for col in train_data_scaled.columns if col != "TSR"]

### Support Vector Regression:

### Recurrent Support Vector Machine Regression: