In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Set random seed for reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

In [3]:
#Read the csv file
df = pd.read_csv('Final_nflx_data_2018-2022.csv')
df['date'] = pd.to_datetime(df['date'])
df

Unnamed: 0,date,Open,High,Low,Close,Adj Close,Volume,P_mean,P_sum,twt_count
0,2018-01-02,196.100006,201.649994,195.419998,201.070007,201.070007,10966900,0.020833,10,480
1,2018-01-03,202.050003,206.210007,201.500000,205.050003,205.050003,8591400,0.071217,24,337
2,2018-01-04,206.199997,207.050003,204.000000,205.630005,205.630005,6029600,-0.018519,-4,216
3,2018-01-05,207.250000,210.020004,205.589996,209.990005,209.990005,7033200,-0.019737,-6,304
4,2018-01-08,210.020004,212.500000,208.440002,212.050003,212.050003,5580200,-0.007663,-2,261
...,...,...,...,...,...,...,...,...,...,...
1132,2022-07-01,176.490005,180.100006,174.270004,179.949997,179.949997,5194700,-0.062315,-21,337
1133,2022-07-05,176.279999,185.919998,172.679993,185.880005,185.880005,7334300,-0.058824,-25,425
1134,2022-07-06,185.199997,186.220001,180.820007,184.059998,184.059998,5753400,-0.014870,-8,538
1135,2022-07-07,184.270004,190.210007,183.500000,189.270004,189.270004,6334500,-0.055427,-24,433


In [4]:
df=df.drop(list(range(14)),axis=0,inplace=False)
df

Unnamed: 0,date,Open,High,Low,Close,Adj Close,Volume,P_mean,P_sum,twt_count
14,2018-01-23,255.050003,257.709991,248.020004,250.289993,250.289993,27705300,-0.061196,-88,1438
15,2018-01-24,250.880005,261.709991,249.309998,261.299988,261.299988,17352400,-0.084525,-65,769
16,2018-01-25,263.000000,272.299988,260.230011,269.700012,269.700012,15336400,-0.122744,-102,831
17,2018-01-26,271.489990,274.600006,268.760010,274.600006,274.600006,11021800,-0.118012,-57,483
18,2018-01-29,274.200012,286.809998,273.920013,284.589996,284.589996,17529700,-0.100690,-73,725
...,...,...,...,...,...,...,...,...,...,...
1132,2022-07-01,176.490005,180.100006,174.270004,179.949997,179.949997,5194700,-0.062315,-21,337
1133,2022-07-05,176.279999,185.919998,172.679993,185.880005,185.880005,7334300,-0.058824,-25,425
1134,2022-07-06,185.199997,186.220001,180.820007,184.059998,184.059998,5753400,-0.014870,-8,538
1135,2022-07-07,184.270004,190.210007,183.500000,189.270004,189.270004,6334500,-0.055427,-24,433


In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)
    df['date'] = pd.to_datetime(df['date'])
    df = df.drop(list(range(14)), axis=0, inplace=False)
    cols = ['Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close', 'P_mean']
    df_for_training = df[cols].astype(float)
    df_for_training.index = df['date']
    return df, df_for_training

file_path = '/content/Final_nflx_data_2018-2022.csv'  # Replace with actual file path
df, df_for_training = load_and_preprocess_data(file_path)

n_past = 5
n_future = 1
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df_for_training)
scaler_for_inference = MinMaxScaler()
scaler_for_inference.fit_transform(df_for_training.loc[:, ['Open', 'Adj Close']])

trainX, trainY = [], []
for i in range(n_past, len(df_scaled) - n_future + 1):
    trainX.append(df_scaled[i - n_past:i, 0:df_for_training.shape[1]])
    trainY.append(df_scaled[i + n_future - 1:i + n_future, [0, -2]])

trainX, trainY = np.array(trainX), np.array(trainY)


In [9]:
import torch
import torch.nn as nn

# ✅ Define the class BEFORE loading the model
class CNNLSTMModel(nn.Module):
    """
    PyTorch CNN-LSTM Model
    """
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(CNNLSTMModel, self).__init__()

        # 1D Convolutional Layers
        self.conv1 = nn.Conv1d(input_size, 128, kernel_size=2, stride=1)
        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv1d(128, 64, kernel_size=2, stride=1)
        self.pool2 = nn.MaxPool1d(kernel_size=1, stride=2)

        # Bidirectional LSTM Layers
        self.lstm = nn.LSTM(64, hidden_size, num_layers,
                            batch_first=True, bidirectional=True, dropout=0.2)

        # Dense Layers
        self.fc1 = nn.Linear(hidden_size * 2, 32)
        self.fc2 = nn.Linear(32, output_size)

        self.relu = nn.ReLU()

    def forward(self, x):
        # Convolutional Layers
        x = x.permute(0, 2, 1)  # Adjust for 1D convolution
        x = self.pool1(self.relu(self.conv1(x)))
        x = self.pool2(self.relu(self.conv2(x)))

        # Prepare for LSTM
        x = x.permute(0, 2, 1)

        # LSTM Layer
        lstm_out, _ = self.lstm(x)

        # Take the last time step
        x = lstm_out[:, -1, :]

        # Dense Layers
        x = self.relu(self.fc1(x))
        x = self.fc2(x)

        return x

# ✅ Now you can safely load the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_twitter = torch.load("/content/stock_model_with_twitter.pt", map_location=device,weights_only=False)
model_twitter.to(device)
model_twitter.eval()  # Set to evaluation mode


CNNLSTMModel(
  (conv1): Conv1d(7, 128, kernel_size=(2,), stride=(1,))
  (pool1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(128, 64, kernel_size=(2,), stride=(1,))
  (pool2): MaxPool1d(kernel_size=1, stride=2, padding=0, dilation=1, ceil_mode=False)
  (lstm): LSTM(64, 256, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (fc1): Linear(in_features=512, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=2, bias=True)
  (relu): ReLU()
)

In [11]:
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close', 'P_mean']


In [14]:
import torch
import numpy as np
import pandas as pd

# Ensure model is in evaluation mode
model_twitter.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_twitter.to(device)

# Prepare input data
x_forecast_twitter = df_for_training.iloc[-n_past:, :].values  # Extract last n_past data points
x_forecast_twitter = scaler.transform(x_forecast_twitter)  # Normalize
x_forecast_twitter = torch.FloatTensor(x_forecast_twitter).reshape(1, n_past, len(features)).to(device)  # Reshape & move to device

# Make prediction
with torch.no_grad():
    prediction_twitter = model_twitter(x_forecast_twitter)

prediction_twitter = prediction_twitter.cpu().numpy()
prediction_twitter = scaler_for_inference.inverse_transform(prediction_twitter)

# Generate future dates
n_future = 1  # Number of days to predict
last_date = df_for_training.index[-1]  # Last available date
predict_period_dates = [last_date + pd.Timedelta(days=i) for i in range(1, n_future + 1)]

# Convert timestamps to date
forecast_dates = [date.date() for date in predict_period_dates]

# Print predictions
if len(forecast_dates) > 0:
    print(f'Date = {forecast_dates[0]}, Prediction Open = {prediction_twitter[0][0]}')
    print(f'Date = {forecast_dates[0]}, Prediction Adjusted Close = {prediction_twitter[0][1]}')


Date = 2022-07-09, Prediction Open = 271.7721862792969
Date = 2022-07-09, Prediction Adjusted Close = 278.4022216796875


