# GRU and LSTM
https://www.tejwin.com/insight/gru-%E8%88%87-lstm/

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import plotly.graph_objects as go
import os
import time
import math
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, TensorDataset


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Get Org Dataset
Obtain the training and test data sets, perform standardization and logarithmic transformation, and verify whether the normalization and logarithmic transformation are successful.

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Read the CSV file
org_df = pd.read_csv('/content/drive/MyDrive/CUNY/Capstone/Data/model/Data_set_for_training_and_testing_final.csv')

# Convert 'Date' to datetime and set as index
org_df['Date'] = pd.to_datetime(org_df['Date'])
org_df.set_index('Date', inplace=True)

# Split data into features (X) and target (y)
X = org_df.drop(["Close", "up_down"], axis=1).copy()
y = org_df[["Close", "up_down"]].copy()


# Replace infinite values with NaN
X.replace([np.inf, -np.inf], np.nan, inplace=True)

# Check for infinite values in each column
inf_columns = X.columns[X.isin([np.inf, -np.inf]).any()].tolist()
print("Columns with infinite values:", inf_columns)

# Fill NaN values with the maximum value of each column
X.fillna(X.max(), inplace=True)

# Check the count of NaN values in each column
print("NaN counts per column:")
print(X.isnull().sum())

# Scale the features

# Convert the scaled features to a DataFrame
X_df= pd.DataFrame(X, columns=X.columns, index=X.index)

# Combine the target values (y) with the scaled features (X_scaled_df)
org_linear_regression_df = pd.concat([X_df, y['Close']], axis=1)
org_binary_classification_df = pd.concat([X_df, y['up_down']], axis=1)


# # Revert transformations
# # 1. Inverse Scaling
# # Restore to original scale
# X_return = scaler.inverse_transform(X_scaled_df)

# # Create a DataFrame for the restored values
# X_return_df = pd.DataFrame(X_return, columns=X.columns, index=X.index)

# linear_regression_return_df = pd.concat([y['Close'], X_return_df], axis=1)

# # 2. Inverse Log Transformation for 'Close'
# linear_regression_return_df['Close'] = np.expm1(org_linear_regression_df['Close'])

# # Create a new dataframe with the original values
# original_values_df = pd.concat([y['Close'], X_return_df], axis=1)

# # Compare to verify the transformations
# comparison_df = original_values_df - linear_regression_return_df

# print(comparison_df.head())


Columns with infinite values: []
NaN counts per column:
previous_date_close_1    0
previous_date_close_2    0
previous_date_close_3    0
previous_date_close_4    0
previous_date_close_5    0
                        ..
WILLR_7_delta            0
CCI_14_delta             0
ROC_14_delta             0
WILLR_14_delta           0
Change_abs               0
Length: 231, dtype: int64


In [None]:
def select_dataset(org_linear_regression_df, org_binary_classification_df, model_type, analysis):

    # Define a dictionary for column selections
    columns_dict = {
        'FA': [
              'Open', 'High', 'Low', 'Vol.', 'Change %', 'Close_delta', 'Open_delta', 'High_delta', 'Low_delta', 'Vol._delta', 'Change %_delta',
              'SOFR', 'HQMCB10YR', 'WM1NS', 'WM2NS', 'CC4WSA', 'CPIAUCSL', 'CSUSHPINSA', 'DCOILWTICO', 'FYFSD', 'GFDEBTN', 'GDPC1', 'ICSA', 'PAYEMS',
              'USSLIND', 'USALOLITONOSTSAM', 'DTWEXBGS', 'DEXUSEU', 'DEXJPUS', 'DEXCHUS', 'DEXUSUK', 'BOGMBASE', 'DGORDER', 'PMI_Actual', 'PMI_Forecast',
              'PMI_Previous', 'VIXCLS', 'Close_delta', 'Open_delta',
              'SOFR_delta', 'HQMCB10YR_delta', 'WM1NS_delta', 'WM2NS_delta', 'CC4WSA_delta', 'CPIAUCSL_delta', 'CSUSHPINSA_delta', 'DCOILWTICO_delta',
              'FYFSD_delta', 'GFDEBTN_delta', 'GDPC1_delta', 'ICSA_delta', 'PAYEMS_delta', 'USSLIND_delta', 'USALOLITONOSTSAM_delta', 'DTWEXBGS_delta',
              'DEXUSEU_delta', 'DEXJPUS_delta', 'DEXCHUS_delta', 'DEXUSUK_delta', 'BOGMBASE_delta', 'DGORDER_delta', 'PMI_Actual_delta', 'PMI_Forecast_delta',
              'PMI_Previous_delta', 'VIXCLS_delta',
              'previous_date_close_1',	'previous_date_close_2',	'previous_date_close_3',	'previous_date_close_4',	'previous_date_close_5'
        ],

        'FA1': [
              'Open', 'High', 'Low', 'Vol.', 'Change %', 'Close_delta', 'Open_delta', 'High_delta', 'Low_delta', 'Vol._delta', 'Change %_delta',
              'SOFR', 'HQMCB10YR', 'WM1NS', 'WM2NS', 'CC4WSA', 'CPIAUCSL', 'CSUSHPINSA', 'DCOILWTICO', 'FYFSD', 'GFDEBTN', 'GDPC1', 'ICSA', 'PAYEMS',
              'USSLIND', 'USALOLITONOSTSAM', 'DTWEXBGS', 'DEXUSEU', 'DEXJPUS', 'DEXCHUS', 'DEXUSUK', 'BOGMBASE', 'DGORDER', 'PMI_Actual', 'PMI_Forecast',
              'PMI_Previous', 'VIXCLS', 'Close_delta', 'Open_delta',
               'VIXCLS_delta',
              'previous_date_close_1',	'previous_date_close_2',	'previous_date_close_3',	'previous_date_close_4',	'previous_date_close_5'
        ],

        'SA': [
        'Open', 'High', 'Low', 'Vol.', 'Change %', 'Close_delta', 'Open_delta', 'High_delta', 'Low_delta', 'Vol._delta', 'Change %_delta',
        'previous_date_close_1',	'previous_date_close_2',	'previous_date_close_3',	'previous_date_close_4',	'previous_date_close_5',
        'negative', 'neutral', 'positive', 'negative_weight', 'neutral_weight', 'positive_weight',
        'SA_simple_score', 'SA_weighted_score',
        'Positive_count', 'Neutral_count', 'Negative_count',
        'positive_score_percentage_simple', 'negative_score_percentage_simple',
        'positive_negative_score_ratio',
        'positive_score_percentage_weighted', 'negative_score_percentage_weighted',
        'positive_negative_score_ratio_weighted',
        'positive_count_percentage', 'negative_count_percentage',
        'positive_negative_count_percentage_ratio',
        'Positive_count_weighted', 'Neutral_count_weighted', 'Negative_count_weighted',
        'positive_count_percentage_weighted', 'negative_count_percentage_weighted',
        'positive_negative_count_percentage_ratio_weighted'
        ],

        'TA': [
        'Open', 'High', 'Low', 'Vol.', 'Change %', 'Close_delta', 'Open_delta', 'High_delta', 'Low_delta', 'Vol._delta', 'Change %_delta',
        'VIXCLS', 'SMA_5', 'SMA_12', 'SMA_26', 'EMA_5', 'EMA_12', 'EMA_26', 'SMA_50', 'SMA_200',
        'macd_12_26_9', 'macd_h_12_26_9', 'macd_s_12_26_9', 'macd_5_9_3', 'macd_h_5_9_3', 'macd_s_5_9_3',
        'macd_24_52_18', 'macd_h_24_52_18', 'macd_s_24_52_18',
        'STOCHk_9_3', 'STOCHd_9_3', 'STOCHk_18_4', 'STOCHd_18_4', 'STOCHk_35_8', 'STOCHd_35_8',
        'RSI_7', 'RSI_14',
        'BB_lower_band_5', 'BB_middle_band_5', 'BB_upper_band_5', 'BB_Width_5', 'BB_Percent_5',
        'BB_lower_band_10', 'BB_middle_band_10', 'BB_upper_band_10', 'BB_Width_10', 'BB_Percent_10',
        'BB_lower_band_20', 'BB_middle_band_20', 'BB_upper_band_20', 'BB_Width_20', 'BB_Percent_20',
        'OBV',
        'ADX_14', 'DMP_14', 'DMN_14', 'ADX_30', 'DMP_30', 'DMN_30', 'ADX_50', 'DMP_50', 'DMN_50',
        'Fib_0.236_5', 'Fib_0.382_5', 'Fib_0.618_5', 'Fib_0.236_20', 'Fib_0.382_20', 'Fib_0.618_20',
        'Fib_0.236_100', 'Fib_0.382_100', 'Fib_0.618_100',
        'CCI_7', 'ROC_7', 'WILLR_7', 'CCI_14', 'ROC_14', 'WILLR_14',
        'VIXCLS_delta', 'SMA_5_delta', 'SMA_12_delta', 'SMA_26_delta', 'EMA_5_delta', 'EMA_12_delta', 'EMA_26_delta',
        'SMA_50_delta', 'SMA_200_delta',
        'macd_12_26_9_delta', 'macd_h_12_26_9_delta', 'macd_s_12_26_9_delta', 'macd_5_9_3_delta', 'macd_h_5_9_3_delta', 'macd_s_5_9_3_delta',
        'macd_24_52_18_delta', 'macd_h_24_52_18_delta', 'macd_s_24_52_18_delta',
        'STOCHk_9_3_delta', 'STOCHd_9_3_delta', 'STOCHk_18_4_delta', 'STOCHd_18_4_delta', 'STOCHk_35_8_delta', 'STOCHd_35_8_delta',
        'RSI_7_delta', 'RSI_14_delta',
        'BB_lower_band_5_delta', 'BB_middle_band_5_delta', 'BB_upper_band_5_delta', 'BB_Width_5_delta', 'BB_Percent_5_delta',
        'BB_lower_band_10_delta', 'BB_middle_band_10_delta', 'BB_upper_band_10_delta', 'BB_Width_10_delta', 'BB_Percent_10_delta',
        'BB_lower_band_20_delta', 'BB_middle_band_20_delta', 'BB_upper_band_20_delta', 'BB_Width_20_delta', 'BB_Percent_20_delta',
        'OBV_delta',
        'ADX_14_delta', 'DMP_14_delta', 'DMN_14_delta', 'ADX_30_delta', 'DMP_30_delta', 'DMN_30_delta', 'ADX_50_delta', 'DMP_50_delta', 'DMN_50_delta',
        'Fib_0.236_5_delta', 'Fib_0.382_5_delta', 'Fib_0.618_5_delta', 'Fib_0.236_20_delta', 'Fib_0.382_20_delta', 'Fib_0.618_20_delta',
        'Fib_0.236_100_delta', 'Fib_0.382_100_delta', 'Fib_0.618_100_delta',
        'CCI_7_delta', 'ROC_7_delta', 'WILLR_7_delta', 'CCI_14_delta', 'ROC_14_delta', 'WILLR_14_delta',
        'Change_abs',
        'previous_date_close_1', 'previous_date_close_2', 'previous_date_close_3', 'previous_date_close_4', 'previous_date_close_5'
        ],

        'CA': [
          'Open', 'High', 'Low', 'Vol.', 'Change %', 'Close_delta', 'Open_delta', 'High_delta', 'Low_delta', 'Vol._delta', 'Change %_delta',
          'far_month_openInterest_0', 'near_month_openInterest', 'total_month_openInterest', 'far_month_openInterest_0_delta', 'near_month_openInterest_delta', 'total_month_openInterest_delta',
          'previous_date_close_1', 'previous_date_close_2', 'previous_date_close_3', 'previous_date_close_4', 'previous_date_close_5'
        ],

        'PA': [
          'Open', 'High', 'Low', 'Vol.', 'Change %', 'Close_delta', 'Open_delta', 'High_delta', 'Low_delta', 'Vol._delta', 'Change %_delta',
          'previous_date_close_1', 'previous_date_close_2', 'previous_date_close_3', 'previous_date_close_4', 'previous_date_close_5'
        ],

        'PAS': [
          'Open', 'High', 'Low', 'Vol.',
        ]

    }

    # Check if the analysis type is valid
    if analysis not in columns_dict:
        raise ValueError(f"Invalid analysis type: {analysis}")

    selected_columns = columns_dict[analysis]

    if model_type == "linear":
        linear_regression_df = org_linear_regression_df[selected_columns + ['Close']]
        return linear_regression_df

    elif model_type == "binary":
        binary_classification_df = org_binary_classification_df[[selected_columns + 'up_down']]
        return binary_classification_df

    else:
        raise ValueError(f"Invalid model type: {model_type}")




In [None]:
columns_dict = {
    'FA': [
          'Open', 'High', 'Low', 'Vol.', 'Change %', 'Close_delta', 'Open_delta', 'High_delta', 'Low_delta', 'Vol._delta', 'Change %_delta',
          'SOFR', 'HQMCB10YR', 'WM1NS', 'WM2NS', 'CC4WSA', 'CPIAUCSL', 'CSUSHPINSA', 'DCOILWTICO', 'FYFSD', 'GFDEBTN', 'GDPC1', 'ICSA', 'PAYEMS',
          'USSLIND', 'USALOLITONOSTSAM', 'DTWEXBGS', 'DEXUSEU', 'DEXJPUS', 'DEXCHUS', 'DEXUSUK', 'BOGMBASE', 'DGORDER', 'PMI_Actual', 'PMI_Forecast',
          'PMI_Previous', 'VIXCLS', 'Close_delta', 'Open_delta',
          'SOFR_delta', 'HQMCB10YR_delta', 'WM1NS_delta', 'WM2NS_delta', 'CC4WSA_delta', 'CPIAUCSL_delta', 'CSUSHPINSA_delta', 'DCOILWTICO_delta',
          'FYFSD_delta', 'GFDEBTN_delta', 'GDPC1_delta', 'ICSA_delta', 'PAYEMS_delta', 'USSLIND_delta', 'USALOLITONOSTSAM_delta', 'DTWEXBGS_delta',
          'DEXUSEU_delta', 'DEXJPUS_delta', 'DEXCHUS_delta', 'DEXUSUK_delta', 'BOGMBASE_delta', 'DGORDER_delta', 'PMI_Actual_delta', 'PMI_Forecast_delta',
          'PMI_Previous_delta', 'VIXCLS_delta',
          'previous_date_close_1',	'previous_date_close_2',	'previous_date_close_3',	'previous_date_close_4',	'previous_date_close_5'
    ],

    'FA1': [
          'Open', 'High', 'Low', 'Vol.', 'Change %', 'Close_delta', 'Open_delta', 'High_delta', 'Low_delta', 'Vol._delta', 'Change %_delta',
          'SOFR', 'HQMCB10YR', 'WM1NS', 'WM2NS', 'CC4WSA', 'CPIAUCSL', 'CSUSHPINSA', 'DCOILWTICO', 'FYFSD', 'GFDEBTN', 'GDPC1', 'ICSA', 'PAYEMS',
          'USSLIND', 'USALOLITONOSTSAM', 'DTWEXBGS', 'DEXUSEU', 'DEXJPUS', 'DEXCHUS', 'DEXUSUK', 'BOGMBASE', 'DGORDER', 'PMI_Actual', 'PMI_Forecast',
          'PMI_Previous', 'VIXCLS', 'Close_delta', 'Open_delta',
            'VIXCLS_delta',
          'previous_date_close_1',	'previous_date_close_2',	'previous_date_close_3',	'previous_date_close_4',	'previous_date_close_5'
    ],

    'SA': [
    'Open', 'High', 'Low', 'Vol.', 'Change %', 'Close_delta', 'Open_delta', 'High_delta', 'Low_delta', 'Vol._delta', 'Change %_delta',
    'previous_date_close_1',	'previous_date_close_2',	'previous_date_close_3',	'previous_date_close_4',	'previous_date_close_5',
    'negative', 'neutral', 'positive', 'negative_weight', 'neutral_weight', 'positive_weight',
    'SA_simple_score', 'SA_weighted_score',
    'Positive_count', 'Neutral_count', 'Negative_count',
    'positive_score_percentage_simple', 'negative_score_percentage_simple',
    'positive_negative_score_ratio',
    'positive_score_percentage_weighted', 'negative_score_percentage_weighted',
    'positive_negative_score_ratio_weighted',
    'positive_count_percentage', 'negative_count_percentage',
    'positive_negative_count_percentage_ratio',
    'Positive_count_weighted', 'Neutral_count_weighted', 'Negative_count_weighted',
    'positive_count_percentage_weighted', 'negative_count_percentage_weighted',
    'positive_negative_count_percentage_ratio_weighted'
    ],

    'TA': [
    'Open', 'High', 'Low', 'Vol.', 'Change %', 'Close_delta', 'Open_delta', 'High_delta', 'Low_delta', 'Vol._delta', 'Change %_delta',
    'VIXCLS', 'SMA_5', 'SMA_12', 'SMA_26', 'EMA_5', 'EMA_12', 'EMA_26', 'SMA_50', 'SMA_200',
    'macd_12_26_9', 'macd_h_12_26_9', 'macd_s_12_26_9', 'macd_5_9_3', 'macd_h_5_9_3', 'macd_s_5_9_3',
    'macd_24_52_18', 'macd_h_24_52_18', 'macd_s_24_52_18',
    'STOCHk_9_3', 'STOCHd_9_3', 'STOCHk_18_4', 'STOCHd_18_4', 'STOCHk_35_8', 'STOCHd_35_8',
    'RSI_7', 'RSI_14',
    'BB_lower_band_5', 'BB_middle_band_5', 'BB_upper_band_5', 'BB_Width_5', 'BB_Percent_5',
    'BB_lower_band_10', 'BB_middle_band_10', 'BB_upper_band_10', 'BB_Width_10', 'BB_Percent_10',
    'BB_lower_band_20', 'BB_middle_band_20', 'BB_upper_band_20', 'BB_Width_20', 'BB_Percent_20',
    'OBV',
    'ADX_14', 'DMP_14', 'DMN_14', 'ADX_30', 'DMP_30', 'DMN_30', 'ADX_50', 'DMP_50', 'DMN_50',
    'Fib_0.236_5', 'Fib_0.382_5', 'Fib_0.618_5', 'Fib_0.236_20', 'Fib_0.382_20', 'Fib_0.618_20',
    'Fib_0.236_100', 'Fib_0.382_100', 'Fib_0.618_100',
    'CCI_7', 'ROC_7', 'WILLR_7', 'CCI_14', 'ROC_14', 'WILLR_14',
    'VIXCLS_delta', 'SMA_5_delta', 'SMA_12_delta', 'SMA_26_delta', 'EMA_5_delta', 'EMA_12_delta', 'EMA_26_delta',
    'SMA_50_delta', 'SMA_200_delta',
    'macd_12_26_9_delta', 'macd_h_12_26_9_delta', 'macd_s_12_26_9_delta', 'macd_5_9_3_delta', 'macd_h_5_9_3_delta', 'macd_s_5_9_3_delta',
    'macd_24_52_18_delta', 'macd_h_24_52_18_delta', 'macd_s_24_52_18_delta',
    'STOCHk_9_3_delta', 'STOCHd_9_3_delta', 'STOCHk_18_4_delta', 'STOCHd_18_4_delta', 'STOCHk_35_8_delta', 'STOCHd_35_8_delta',
    'RSI_7_delta', 'RSI_14_delta',
    'BB_lower_band_5_delta', 'BB_middle_band_5_delta', 'BB_upper_band_5_delta', 'BB_Width_5_delta', 'BB_Percent_5_delta',
    'BB_lower_band_10_delta', 'BB_middle_band_10_delta', 'BB_upper_band_10_delta', 'BB_Width_10_delta', 'BB_Percent_10_delta',
    'BB_lower_band_20_delta', 'BB_middle_band_20_delta', 'BB_upper_band_20_delta', 'BB_Width_20_delta', 'BB_Percent_20_delta',
    'OBV_delta',
    'ADX_14_delta', 'DMP_14_delta', 'DMN_14_delta', 'ADX_30_delta', 'DMP_30_delta', 'DMN_30_delta', 'ADX_50_delta', 'DMP_50_delta', 'DMN_50_delta',
    'Fib_0.236_5_delta', 'Fib_0.382_5_delta', 'Fib_0.618_5_delta', 'Fib_0.236_20_delta', 'Fib_0.382_20_delta', 'Fib_0.618_20_delta',
    'Fib_0.236_100_delta', 'Fib_0.382_100_delta', 'Fib_0.618_100_delta',
    'CCI_7_delta', 'ROC_7_delta', 'WILLR_7_delta', 'CCI_14_delta', 'ROC_14_delta', 'WILLR_14_delta',
    'Change_abs',
    'previous_date_close_1', 'previous_date_close_2', 'previous_date_close_3', 'previous_date_close_4', 'previous_date_close_5'
    ],

    'CA': [
      'Open', 'High', 'Low', 'Vol.', 'Change %', 'Close_delta', 'Open_delta', 'High_delta', 'Low_delta', 'Vol._delta', 'Change %_delta',
      'far_month_openInterest_0', 'near_month_openInterest', 'total_month_openInterest', 'far_month_openInterest_0_delta', 'near_month_openInterest_delta', 'total_month_openInterest_delta',
      'previous_date_close_1', 'previous_date_close_2', 'previous_date_close_3', 'previous_date_close_4', 'previous_date_close_5'
    ],

    'PA': [
      'Open', 'High', 'Low', 'Vol.', 'Change %', 'Close_delta', 'Open_delta', 'High_delta', 'Low_delta', 'Vol._delta', 'Change %_delta',
      'previous_date_close_1', 'previous_date_close_2', 'previous_date_close_3', 'previous_date_close_4', 'previous_date_close_5'
    ],

    'PAS': [
      'Open', 'High', 'Low', 'Vol.',
    ]

}

len(columns_dict.get('TA'))

149

In [None]:
def create_dataset(dataset, lookback):
    X, y = [], []
    for i in range(len(dataset)-lookback):
        feature = dataset[i:i+lookback, :-1]
        target = dataset[i+1:i+lookback+1][-1][-1]
        X.append(feature)
        y.append(target)
    return torch.FloatTensor(X).to(device), torch.FloatTensor(y).view(-1, 1).to(device)



In [None]:
def loss_curve(epochs, train_loss, test_loss):

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=np.arange(epochs), y=train_loss,
                        mode='lines',
                        name='Train Loss'))
    fig.add_trace(go.Scatter(x=np.arange(epochs) , y=test_loss,
                        mode='lines',
                        name='Validation Loss'))
    fig.update_layout(
        title="Loss curve for single lstm",
        xaxis_title="epochs",
        yaxis_title="rmse"
    )
    fig.show()
# loss_curve(epochs, train_loss, test_loss)

In [None]:
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler


def predict_plot(model, X_train, X_val, data, model_name):

    train_plot = np.ones_like(data[:, 3]) * np.nan
    test_plot = np.ones_like(data[:, 3]) * np.nan




    with torch.no_grad():
        # Predictions on the training set
        y_pred_train = model(X_train)

        train_plot[lookback:int(0.8 * len(data))] = y_pred_train.view(-1).cpu()

        # Predictions on the validation set
        y_pred_val = model(X_val)
        test_plot[int(0.8 * len(data))+lookback:] = y_pred_val.view(-1).cpu()

    train_price = (train_plot * scaler.scale_[0]) + scaler.mean_[0]
    test_price = (test_plot * scaler.scale_[0]) + scaler.mean_[0]
    real_price = (data[:, -1] * scaler.scale_[0]) + scaler.mean_[0]


    # Create the plot
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=dataset.index, y=train_price,
                            mode='lines',
                            name='Train'))
    fig.add_trace(go.Scatter(x=dataset.index, y=test_price,
                            mode='lines',
                            name='Validation'))
    fig.add_trace(go.Scatter(x=dataset.index, y=real_price,
                            mode='lines',
                            name='True'))

    if 'GRU' in model_name:
        fig.update_layout(
            title="Single GRU prediction of S&P 500 E-mini futures Close Price",
            xaxis_title="Dates",
            yaxis_title="Standardized Stock Price"
        )

    elif 'LSTM' in model_name:
        fig.update_layout(
            title="Single LSTM prediction of S&P 500 E-mini futures Close Price",
            xaxis_title="Dates",
            yaxis_title="Standardized Stock Price"
        )
    fig.show()

# predict_plot(model, X_train, X_val, data)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import plotly.graph_objects as go

def statistics_index(model, X_val, y_val):

    # Assuming you have the model predictions and target values:
    y_pred = model(X_val)  # Replace with your model's prediction method
    y_true = y_val

    # Move to CPU if they are on GPU:
    if isinstance(y_pred, torch.Tensor) and isinstance(y_true, torch.Tensor):
        y_pred = y_pred.cpu().detach().numpy()
        y_true = y_true.cpu().detach().numpy()

    # Now you can use them with NumPy:
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)

    # Summary
    print(f"\nMSE: {mse}")
    print(f"MAE: {mae}")
    print(f"RMSE: {rmse}")
    print(f"R^2 Score: {r2}")

    return mse, mae, rmse, r2

# Example usage:
# mse, mae, rmse, r2 = statistics_index(model, X_val, y_val)


In [None]:
import torch
import datetime



def save_model(model, model_name):
    datetime_now = datetime.datetime.now()
    est = pytz.timezone('US/Eastern')
    datetime_est = datetime_now.astimezone(est)
    current_time = datetime_est.strftime("%Y%m%d_%H%M%S")
    print(model_name)

    torch.save(model.state_dict(), f"/content/drive/MyDrive/CUNY/Capstone/Data/model/S_LSTM/{model_name}_{current_time}.pth")
    return current_time


In [None]:
import pandas as pd
import datetime
import pytz

def save_record(csv_file_path):
    # Assuming your existing CSV file is named "S_GRU.csv"
    # csv_file_path = "/content/drive/MyDrive/CUNY/Capstone/Data/model/S_GRU/S_GRU_record.csv"

    # Read the existing CSV file into a DataFrame
    df = pd.read_csv(csv_file_path)

    # Your experimental results, you can replace this with your actual results
    results = {
        'Time': datetime.datetime.now(pytz.timezone('US/Eastern')).strftime("%Y-%m-%d %H:%M:%S"),
        'analyst': analyst,
        'model_name': model_name,
        'model_task': model_task,
        'hidden_size1': hidden_size1,
        'dropout1': dropout1,
        'learning_rate': learning_rate,
        'mse': mse,
        'mae': mae,
        'rmse': rmse,
        'r2': r2,
    }

    # Append the results to the DataFrame
    df = df.append(results, ignore_index=True)

    # Save the updated DataFrame back to the CSV file
    df.to_csv(csv_file_path, index=False)


# LSTM
加上超參數

In [None]:
import itertools
from tensorflow.keras.callbacks import EarlyStopping

# 建立單層LSTM函式
class S_LSTM(nn.Module):
    def __init__(self, input_size, hidden_size1, dropout1):
        super().__init__()
        self.lstm1 = nn.LSTM(input_size, hidden_size1, num_layers=1, batch_first=True)  # Single LSTM layer
        self.dropout1 = nn.Dropout(dropout1)  # Dropout layer
        self.linear = nn.Linear(hidden_size1, 1)

    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.dropout1(x)  # Apply dropout
        x = x[:, -1, :]  # Take the last output for prediction
        x = self.linear(x)
        return x
    def hyperparameters(self):
        return {
            'hidden_size1': self.lstm1.hidden_size,  # Access hidden_size instead
            # 'hidden_size2': self.lstm2.hidden_size,  # Commented out as model is single-layer
            'dropout1': self.dropout1.p,
            # 'dropout2': self.dropout2.p,  # Commented out as only one dropout layer
            # Add other hyperparameters as needed
        }

class S_GRU(nn.Module):
    def __init__(self, input_size, hidden_size1, dropout1):
        super().__init__()
        self.lstm1 = nn.GRU(input_size, hidden_size1, num_layers=1, batch_first=True)  # Single LSTM layer
        self.dropout1 = nn.Dropout(dropout1)  # Dropout layer
        self.linear = nn.Linear(hidden_size1, 1)

    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.dropout1(x)  # Apply dropout
        x = x[:, -1, :]  # Take the last output for prediction
        x = self.linear(x)
        return x
    def hyperparameters(self):
        return {
            'hidden_size1': self.lstm1.hidden_size,  # Access hidden_size instead
            # 'hidden_size2': self.lstm2.hidden_size,  # Commented out as model is single-layer
            'dropout1': self.dropout1.p,
            # 'dropout2': self.dropout2.p,  # Commented out as only one dropout layer
            # Add other hyperparameters as needed
        }



# 建立訓練流程函式
def trainer(epochs, loader, X_train, y_train, X_val, y_val, model, criterion, optimizer):
  train_loss, test_loss = [],[]
  for epoch in range(epochs):
    model.train()
    for batch, (x, y_true) in enumerate(loader):
      y_pred = model(x)
      loss = criterion(y_pred, y_true)
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()
    model.eval()
    with torch.no_grad():
      y_pred = model(X_train)
      train_rmse = np.sqrt(criterion(y_pred, y_train).item())
      train_loss.append(train_rmse)
      y_pred = model(X_val)
      test_rmse = np.sqrt(criterion(y_pred, y_val).item())
      test_loss.append(test_rmse)
      if (epoch+1) % 100 == 0:
        print('epoch %d train rmse %.4f test rmse %.4f' % (epoch+1, train_rmse, test_rmse))
  return train_loss, test_loss


# Generate meta_features

In [None]:
# import torch

# path = "/content/drive/MyDrive/CUNY/Capstone/Data/model/Final"
# base_name = 'FA1_S_LSTM_linear_hidden_size1_256_dropout1_0.7_lr_0.001_20240108_001558.pth'

# # Construct the full path to the saved PyTorch model
# model_path = f'{path}/{base_name}'

# # Load the model
# loaded_model = torch.load(model_path)

# # If the model is loaded successfully, print a confirmation message
# print(f"Model {base_name} loaded successfully!")


In [None]:
import os
import torch
import re
import copy

model_task = 'linear'
path = "/content/drive/MyDrive/CUNY/Capstone/Data/model/Final"

# Get all files in the directory
all_files = os.listdir(path)

# Filter out directories and keep only files
files_only = [f for f in all_files if os.path.isfile(os.path.join(path, f))]

# Assuming model is the model you loaded
model_dict = {}
meta_features = []

for model_name in files_only:


    # Construct the full path to the saved PyTorch model
    model_path = f'{path}/{model_name}'

    # Load the model
    loaded_model = torch.load(model_path)

    # If the model is loaded successfully, print a confirmation message
    print(f"Model {model_name} loaded successfully!")


    # Extract hyperparameters using regex
    analyst_match_obj = re.search(r'([A-Z]+\d*)', model_name)
    if analyst_match_obj:
        analyst_match = analyst_match_obj.group(1)
    else:
        analyst_match = None

    print(analyst_match)
    match = re.search(r'linear_hidden_size1_(\d+)_dropout1_([\d.]+)_lr_', model_name)

    if match:
        hidden_size = int(match.group(1))
        dropout_rate = float(match.group(2))
        input_size = len(columns_dict.get(analyst_match))
        input_size = len(columns_dict.get(analyst_match))


        print(f"Model Name: {model_name}")
        print(f"Input Size: {input_size}")
        print(f"Hidden Size: {hidden_size}")
        print(f"Dropout Rate: {dropout_rate}")


    analyst = analyst_match
    dataset = select_dataset(org_linear_regression_df, org_binary_classification_df, model_task, analyst)
    # 股價
    data = dataset.values
    # 標準化
    scaler = StandardScaler()
    data = scaler.fit_transform(data)
    # 訓練與驗證集
    train, test = data[:int(0.8 * len(data)), :dataset.shape[1]], data[int(0.8 * len(data)):, :dataset.shape[1]]

    lookback = 5 # 設定前五天股價預測下一日
    X_train, y_train = create_dataset(train, lookback = lookback)
    X_val, y_val = create_dataset(test, lookback = lookback)
    X_ALL, y_ALL = create_dataset(data, lookback = lookback)
    # loader = DataLoader(TensorDataset(X_train, y_train), shuffle = False, batch_size = 32)
    feature_amt = X_ALL.shape[2]

    print('X_ALL.shape: ',X_ALL.shape)
    print('y_ALL.shape: ',y_ALL.shape)

    # Determine the input_size based on the model name
    if 'GRU' in model_name:
        model = S_GRU(input_size=feature_amt, hidden_size1=hidden_size,  dropout1=dropout_rate).to(device)
    elif 'LSTM' in model_name:
        model = S_LSTM(input_size=feature_amt, hidden_size1=hidden_size,  dropout1=dropout_rate).to(device)
    else:
        input_size = None  # Handle other cases if needed


    # Create the model with hyperparameters from the current combination

    criterion = nn.MSELoss()
    model_path = model_path

    # Load the saved state dictionary into your model
    model.load_state_dict(torch.load(model_path))

    # Ensure the model is in evaluation mode
    model.eval()

    mse, mae, rmse, r2 = statistics_index(model, X_val, y_val)

    predict_plot(model, X_train, X_val, data, model_name)

    model_predictions = model(X_ALL).detach().numpy()
    print("model_predictions.shape: ", model_predictions.shape)
    meta_features.append(model_predictions)


    model_dict[f'model_{analyst_match}'] = copy.deepcopy(model)

    # Now you can access the model using the key
    desired_model = model_dict[f'model_{analyst_match}']
    print("_"*50)


# Assuming y_ALL is a PyTorch tensor
y_ALL_np = y_ALL.numpy()

# Reshape the NumPy array to (2824, 1)
y_ALL_np_reshaped = y_ALL_np.reshape(-1, 1)

# Now y_ALL_np_reshaped has shape (2824, 1)
print("y_ALL_np_reshaped.shape:", y_ALL_np_reshaped.shape)



meta_features.append(y_ALL_np_reshaped)


meta_features_np = np.hstack(meta_features)
print("meta_features.shape: ", meta_features_np.shape)

meta_features = pd.DataFrame(np.hstack(meta_features))


print(model_dict)

Model SA_S_GRU_linear_hidden_size1_128_dropout1_0.4_lr_0.005_20240106_004316.pth loaded successfully!
SA
Model Name: SA_S_GRU_linear_hidden_size1_128_dropout1_0.4_lr_0.005_20240106_004316.pth
Input Size: 42
Hidden Size: 128
Dropout Rate: 0.4
X_ALL.shape:  torch.Size([2824, 5, 42])
y_ALL.shape:  torch.Size([2824, 1])

MSE: 0.015551947988569736
MAE: 0.09339756518602371
RMSE: 0.12470745295286179
R^2 Score: 0.8764439204665575


model_predictions.shape:  (2824, 1)
__________________________________________________
Model TA_S_GRU_linear_hidden_size1_256_dropout1_0.4_lr_0.001_20240106_201450.pth loaded successfully!
TA
Model Name: TA_S_GRU_linear_hidden_size1_256_dropout1_0.4_lr_0.001_20240106_201450.pth
Input Size: 149
Hidden Size: 256
Dropout Rate: 0.4
X_ALL.shape:  torch.Size([2824, 5, 149])
y_ALL.shape:  torch.Size([2824, 1])

MSE: 0.028564894571900368
MAE: 0.1282465010881424
RMSE: 0.16901151835918427
R^2 Score: 0.7730595339423267


model_predictions.shape:  (2824, 1)
__________________________________________________
Model FA1_S_LSTM_linear_hidden_size1_256_dropout1_0.7_lr_0.001_20240108_001558.pth loaded successfully!
FA1
Model Name: FA1_S_LSTM_linear_hidden_size1_256_dropout1_0.7_lr_0.001_20240108_001558.pth
Input Size: 45
Hidden Size: 256
Dropout Rate: 0.7
X_ALL.shape:  torch.Size([2824, 5, 45])
y_ALL.shape:  torch.Size([2824, 1])

MSE: 0.025711864233016968
MAE: 0.12894214689731598
RMSE: 0.16034919023513794
R^2 Score: 0.7957261064604761


model_predictions.shape:  (2824, 1)
__________________________________________________
Model PAS_S_LSTM_linear_hidden_size1_128_dropout1_0.5_lr_0.001_20240108_080238.pth loaded successfully!
PAS
Model Name: PAS_S_LSTM_linear_hidden_size1_128_dropout1_0.5_lr_0.001_20240108_080238.pth
Input Size: 4
Hidden Size: 128
Dropout Rate: 0.5
X_ALL.shape:  torch.Size([2824, 5, 4])
y_ALL.shape:  torch.Size([2824, 1])

MSE: 0.0069555677473545074
MAE: 0.06272891163825989
RMSE: 0.08340004831552505
R^2 Score: 0.9447398695416136


model_predictions.shape:  (2824, 1)
__________________________________________________
Model PA_S_LSTM_linear_hidden_size1_256_dropout1_0.7_lr_0.001_20240108_093848.pth loaded successfully!
PA
Model Name: PA_S_LSTM_linear_hidden_size1_256_dropout1_0.7_lr_0.001_20240108_093848.pth
Input Size: 16
Hidden Size: 256
Dropout Rate: 0.7
X_ALL.shape:  torch.Size([2824, 5, 16])
y_ALL.shape:  torch.Size([2824, 1])

MSE: 0.014701406471431255
MAE: 0.09652090817689896
RMSE: 0.12124935537576675
R^2 Score: 0.8832012521716113


model_predictions.shape:  (2824, 1)
__________________________________________________
Model CA_S_GRU_linear_hidden_size1_256_dropout1_0.4_lr_0.005_20240106_115413.pth loaded successfully!
CA
Model Name: CA_S_GRU_linear_hidden_size1_256_dropout1_0.4_lr_0.005_20240106_115413.pth
Input Size: 22
Hidden Size: 256
Dropout Rate: 0.4
X_ALL.shape:  torch.Size([2824, 5, 22])
y_ALL.shape:  torch.Size([2824, 1])

MSE: 0.013443193398416042
MAE: 0.09027586877346039
RMSE: 0.11594478785991669
R^2 Score: 0.8931974043621657


model_predictions.shape:  (2824, 1)
__________________________________________________
y_ALL_np_reshaped.shape: (2824, 1)
meta_features.shape:  (2824, 7)
{'model_SA': S_GRU(
  (lstm1): GRU(42, 128, batch_first=True)
  (dropout1): Dropout(p=0.4, inplace=False)
  (linear): Linear(in_features=128, out_features=1, bias=True)
), 'model_TA': S_GRU(
  (lstm1): GRU(149, 256, batch_first=True)
  (dropout1): Dropout(p=0.4, inplace=False)
  (linear): Linear(in_features=256, out_features=1, bias=True)
), 'model_FA1': S_LSTM(
  (lstm1): LSTM(45, 256, batch_first=True)
  (dropout1): Dropout(p=0.7, inplace=False)
  (linear): Linear(in_features=256, out_features=1, bias=True)
), 'model_PAS': S_LSTM(
  (lstm1): LSTM(4, 128, batch_first=True)
  (dropout1): Dropout(p=0.5, inplace=False)
  (linear): Linear(in_features=128, out_features=1, bias=True)
), 'model_PA': S_LSTM(
  (lstm1): LSTM(16, 256, batch_first=True)
  (dropout1): Dropout(p=0.7, inplace=False)
  (linear): Linear(in_features=256, out_features

In [None]:
import numpy as np
meta_features


Unnamed: 0,0,1,2,3,4,5,6
0,-1.412277,-1.604499,-1.351468,-1.386796,-1.299492,-1.315312,-1.450810
1,-1.572042,-1.598969,-1.365120,-1.389052,-1.297648,-1.311175,-1.448364
2,-1.597703,-1.593607,-1.341371,-1.375773,-1.295966,-1.301813,-1.444560
3,-1.610811,-1.580476,-1.371872,-1.374818,-1.315762,-1.285856,-1.447549
4,-1.554799,-1.580505,-1.364730,-1.370793,-1.296244,-1.302799,-1.447278
...,...,...,...,...,...,...,...
2819,1.281397,1.394165,1.273878,1.339526,1.467257,1.447528,1.341714
2820,1.417875,1.482776,1.334230,1.456270,1.400313,1.394860,1.290081
2821,1.380637,1.342464,1.231290,1.342572,1.398120,1.433992,1.359921
2822,1.308386,1.302697,1.210489,1.332599,1.332658,1.326144,1.348236


In [None]:
meta_features = meta_features.reset_index(drop=True)
meta_features.columns = ['SA', 'TA', 'FA1', 'PAS', 'PA', 'CA', 'Close']
meta_features_reverse_df = meta_features* scaler.scale_[0] + scaler.mean_[0]
meta_features_reverse_df


Unnamed: 0,SA,TA,FA1,PAS,PA,CA,Close
0,1318.664062,1141.739746,1374.633667,1342.117188,1422.474121,1407.912720,1283.197388
1,1171.613647,1146.828979,1362.068237,1340.040771,1424.170776,1411.720337,1285.448608
2,1147.994385,1151.764648,1383.927246,1352.262817,1425.719238,1420.337646,1288.950317
3,1135.929321,1163.851074,1355.853271,1353.142090,1407.498047,1435.024048,1286.198975
4,1187.484375,1163.824341,1362.427734,1356.846924,1425.463379,1419.429932,1286.448975
...,...,...,...,...,...,...,...
2819,3797.972656,3901.766602,3791.052002,3851.476074,3969.042236,3950.882812,3853.489746
2820,3923.590332,3983.325928,3846.601074,3958.929688,3907.425293,3902.406006,3805.965820
2821,3889.315430,3854.180420,3751.853516,3854.279785,3905.406982,3938.424316,3870.248047
2822,3822.814209,3817.577637,3732.707520,3845.100586,3845.154297,3839.158936,3859.492676


In [None]:
meta_features_reverse_evaluate_df = meta_features_reverse_df.copy()
meta_features_reverse_evaluate_df['up_down'] = meta_features_reverse_evaluate_df['Close'] - meta_features_reverse_evaluate_df['Close'].shift(1)
meta_features_reverse_evaluate_df['up_down'].fillna(0, inplace=True)
meta_features_reverse_evaluate_df



for coulmn in meta_features_reverse_evaluate_df.columns:
    meta_features_reverse_evaluate_df[f'{coulmn}_correct_prediction'] = np.where(
        (
            (meta_features_reverse_evaluate_df['up_down'] >= 0) &
            (meta_features_reverse_evaluate_df[f'{coulmn}'] - meta_features_reverse_evaluate_df['Close'].shift(1) >= 0)
        )
        |
        (
            (meta_features_reverse_evaluate_df['up_down'] < 0) &
            (meta_features_reverse_evaluate_df[f'{coulmn}'] - meta_features_reverse_evaluate_df['Close'].shift(1) < 0)
        ),
        1,
        0
    )

meta_features_reverse_evaluate_df



Unnamed: 0,SA,TA,FA1,PAS,PA,CA,Close,up_down,SA_correct_prediction,TA_correct_prediction,FA1_correct_prediction,PAS_correct_prediction,PA_correct_prediction,CA_correct_prediction,Close_correct_prediction,up_down_correct_prediction
0,1318.664062,1141.739746,1374.633667,1342.117188,1422.474121,1407.912720,1283.197388,0.000000,0,0,0,0,0,0,0,0
1,1171.613647,1146.828979,1362.068237,1340.040771,1424.170776,1411.720337,1285.448608,2.251221,0,0,1,1,1,1,1,0
2,1147.994385,1151.764648,1383.927246,1352.262817,1425.719238,1420.337646,1288.950317,3.501709,0,0,1,1,1,1,1,0
3,1135.929321,1163.851074,1355.853271,1353.142090,1407.498047,1435.024048,1286.198975,-2.751343,1,1,0,0,0,0,1,1
4,1187.484375,1163.824341,1362.427734,1356.846924,1425.463379,1419.429932,1286.448975,0.250000,0,0,1,1,1,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2819,3797.972656,3901.766602,3791.052002,3851.476074,3969.042236,3950.882812,3853.489746,-14.757324,1,0,1,1,0,0,1,1
2820,3923.590332,3983.325928,3846.601074,3958.929688,3907.425293,3902.406006,3805.965820,-47.523926,0,0,1,0,0,0,1,1
2821,3889.315430,3854.180420,3751.853516,3854.279785,3905.406982,3938.424316,3870.248047,64.282227,1,1,0,1,1,1,1,0
2822,3822.814209,3817.577637,3732.707520,3845.100586,3845.154297,3839.158936,3859.492676,-10.755371,1,1,1,1,1,1,1,1


In [None]:
meta_features_reverse_evaluate_df_last_560 = meta_features_reverse_evaluate_df.tail(560)
meta_features_reverse_evaluate_df_last_560.describe()


Unnamed: 0,SA,TA,FA1,PAS,PA,CA,Close,up_down,SA_correct_prediction,TA_correct_prediction,FA1_correct_prediction,PAS_correct_prediction,PA_correct_prediction,CA_correct_prediction,Close_correct_prediction,up_down_correct_prediction
count,560.0,560.0,560.0,560.0,560.0,560.0,560.0,560.0,560.0,560.0,560.0,560.0,560.0,560.0,560.0,560.0
mean,4126.054199,4041.266602,4173.217285,4138.78125,4201.267578,4163.830566,4138.449219,1.038468,0.514286,0.473214,0.535714,0.5,0.517857,0.507143,1.0,0.483929
std,334.611786,284.438934,341.255859,326.44931,365.512482,373.986725,325.031982,48.181484,0.500243,0.499728,0.499169,0.500447,0.500128,0.500396,0.0,0.500188
min,3326.555664,3352.441406,3237.330566,3238.93457,3282.548096,3301.264404,3298.710693,-179.590088,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
25%,3847.868652,3830.122498,3897.69873,3874.862549,3909.272827,3864.419556,3870.177979,-25.597961,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
50%,4111.312012,4034.839966,4236.830078,4148.740723,4203.855957,4147.888184,4147.512451,1.625732,1.0,0.0,1.0,0.5,1.0,1.0,1.0,0.0
75%,4385.583252,4273.382202,4458.226807,4413.889893,4507.641113,4446.716675,4403.828857,30.515137,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
max,5096.293945,4604.85791,4752.125977,4800.645508,4939.429688,5081.401367,4784.958008,205.603271,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [None]:
# import pandas as pd
# meta_model_train_test_df = meta_features_reverse_df
# meta_model_train_test_df.to_csv("/content/drive/MyDrive/CUNY/Capstone/Data/training_data/meta_model_train_test_df.csv", index=False)

In [None]:
# !pip freeze

absl-py==1.4.0
aiohttp==3.9.1
aiosignal==1.3.1
alabaster==0.7.13
albumentations==1.3.1
altair==4.2.2
anyio==3.7.1
appdirs==1.4.4
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
array-record==0.5.0
arviz==0.15.1
astropy==5.3.4
astunparse==1.6.3
async-timeout==4.0.3
atpublic==4.0
attrs==23.2.0
audioread==3.0.1
autograd==1.6.2
Babel==2.14.0
backcall==0.2.0
beautifulsoup4==4.11.2
bidict==0.22.1
bigframes==0.18.0
bleach==6.1.0
blinker==1.4
blis==0.7.11
blosc2==2.0.0
bokeh==3.3.2
bqplot==0.12.42
branca==0.7.0
build==1.0.3
CacheControl==0.13.1
cachetools==5.3.2
catalogue==2.0.10
certifi==2023.11.17
cffi==1.16.0
chardet==5.2.0
charset-normalizer==3.3.2
chex==0.1.7
click==8.1.7
click-plugins==1.1.1
cligj==0.7.2
cloudpickle==2.2.1
cmake==3.27.9
cmdstanpy==1.2.0
colorcet==3.0.1
colorlover==0.3.0
colour==0.1.5
community==1.0.0b1
confection==0.1.4
cons==0.4.6
contextlib2==21.6.0
contourpy==1.2.0
cryptography==41.0.7
cufflinks==0.17.3
cupy-cuda12x==12.2.0
cvxopt==1.3.2
cvxpy==1.3.2
cycler==0.12.1
c