In [1]:
import pandas as pd
import numpy as np
import mlflow
import mlflow.tensorflow
import mlflow.keras
import datetime as dt

import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, Dropout, LSTM
from sklearn.metrics import mean_squared_error,mean_absolute_error
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from tensorflow.keras.models import Sequential

import pickle

In [2]:
df = pd.read_csv("Daily Stock Data.csv")
df.head()

Unnamed: 0,CompanyID,Date,Open,High,Low,Close,Volume,Change,Previous Close,Previous Close Filled,True Range,OBV,OBV_MovingAvg_14,OBV_Oscillator
0,1,10/28/2019 0:00,107.99,108.46,106.45,106.6,7000000,-0.0055,,106.6,2.01,-7000000,-7000000,0
1,1,10/29/2019 0:00,106.84,107.02,104.69,105.0,5420000,-0.015,106.6,106.6,2.33,-5420000,-5420000,0
2,1,10/30/2019 0:00,105.29,106.6,103.96,106.5,4200000,0.0143,106.6,106.6,2.64,4200000,4200000,0
3,1,10/31/2019 0:00,106.47,106.5,103.26,104.1,7200000,-0.0225,106.6,106.6,3.34,-7200000,-7200000,0
4,1,11/1/2019 0:00,104.7,105.3,103.93,104.98,5490000,0.0085,106.6,106.6,2.67,5490000,5490000,0


In [23]:
df.shape

(12562, 14)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12562 entries, 0 to 12561
Data columns (total 14 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   CompanyID              12562 non-null  int64  
 1   Date                   12562 non-null  object 
 2   Open                   12562 non-null  float64
 3   High                   12562 non-null  float64
 4   Low                    12562 non-null  float64
 5   Close                  12562 non-null  float64
 6   Volume                 12562 non-null  int64  
 7   Change                 12562 non-null  float64
 8   Previous Close         12552 non-null  float64
 9   Previous Close Filled  12562 non-null  float64
 10  True Range             12562 non-null  float64
 11  OBV                    12562 non-null  int64  
 12  OBV_MovingAvg_14       12562 non-null  int64  
 13  OBV_Oscillator         12562 non-null  int64  
dtypes: float64(8), int64(5), object(1)
memory usage: 1.3+ 

In [3]:
def specific_data_company(id_company):
  company_data = df[df["CompanyID"] == id_company]
  return company_data

In [4]:
AAPL = specific_data_company(1)
AAPL.shape

(1258, 14)

In [5]:
AAPL['Date'] = pd.to_datetime(AAPL['Date'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  AAPL['Date'] = pd.to_datetime(AAPL['Date'])


In [6]:
AAPL.head()

Unnamed: 0,CompanyID,Date,Open,High,Low,Close,Volume,Change,Previous Close,Previous Close Filled,True Range,OBV,OBV_MovingAvg_14,OBV_Oscillator
0,1,2019-10-28,107.99,108.46,106.45,106.6,7000000,-0.0055,,106.6,2.01,-7000000,-7000000,0
1,1,2019-10-29,106.84,107.02,104.69,105.0,5420000,-0.015,106.6,106.6,2.33,-5420000,-5420000,0
2,1,2019-10-30,105.29,106.6,103.96,106.5,4200000,0.0143,106.6,106.6,2.64,4200000,4200000,0
3,1,2019-10-31,106.47,106.5,103.26,104.1,7200000,-0.0225,106.6,106.6,3.34,-7200000,-7200000,0
4,1,2019-11-01,104.7,105.3,103.93,104.98,5490000,0.0085,106.6,106.6,2.67,5490000,5490000,0


In [7]:
AAPL.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1258 entries, 0 to 1257
Data columns (total 14 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   CompanyID              1258 non-null   int64         
 1   Date                   1258 non-null   datetime64[ns]
 2   Open                   1258 non-null   float64       
 3   High                   1258 non-null   float64       
 4   Low                    1258 non-null   float64       
 5   Close                  1258 non-null   float64       
 6   Volume                 1258 non-null   int64         
 7   Change                 1258 non-null   float64       
 8   Previous Close         1257 non-null   float64       
 9   Previous Close Filled  1258 non-null   float64       
 10  True Range             1258 non-null   float64       
 11  OBV                    1258 non-null   int64         
 12  OBV_MovingAvg_14       1258 non-null   int64         
 13  OBV_Osci

In [6]:
# Get the complete date range for each CompanyID
def complete_date_range(group):
    full_date_range = pd.date_range(start=group["Date"].min(), end=group["Date"].max())
    return pd.DataFrame({"Date": full_date_range, "CompanyID": group["CompanyID"].iloc[0]})

# Apply function to get the full dataset with all dates
AAPL_full = pd.concat([complete_date_range(group) for _, group in AAPL.groupby("CompanyID")])

# Merge the full date range with the original data
AAPL_full = AAPL_full.merge(AAPL, on=["CompanyID", "Date"], how="left")

# Fill missing values
# Forward fill numerical data
AAPL_full.sort_values(by=["CompanyID", "Date"], inplace=True)

AAPL_full["Close"] = AAPL_full["Close"].ffill()
AAPL_full["True Range"] = AAPL_full["True Range"].ffill()
AAPL_full["OBV_MovingAvg_14"] = AAPL_full["OBV_MovingAvg_14"].ffill()


In [7]:
AAPL_full = AAPL_full[["Date", "Close", "True Range", "OBV_MovingAvg_14"]]
AAPL_full.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1825 entries, 0 to 1824
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Date              1825 non-null   datetime64[ns]
 1   Close             1825 non-null   float64       
 2   True Range        1825 non-null   float64       
 3   OBV_MovingAvg_14  1825 non-null   float64       
dtypes: datetime64[ns](1), float64(3)
memory usage: 57.2 KB


In [8]:
AAPL_full = AAPL_full.sort_values("Date")
AAPL_full.tail(2)

Unnamed: 0,Date,Close,True Range,OBV_MovingAvg_14
1823,2024-10-24,81.39,227.61,7400000.0
1824,2024-10-25,81.7,226.83,7200000.0


In [8]:
scaler = MinMaxScaler()
features = ["Close", "True Range", "OBV_MovingAvg_14"]
AAPL_Scaler = scaler.fit_transform(AAPL_full[features])
AAPL_Scaled = pd.DataFrame(AAPL_Scaler, columns=features)
AAPL_Scaled.head(1)

Unnamed: 0,Close,True Range,OBV_MovingAvg_14
0,0.21775,0.003422,0.680208


In [9]:
print(AAPL_Scaled.isna().sum())

Close               0
True Range          0
OBV_MovingAvg_14    0
dtype: int64


In [10]:
def create_sequences(data, time_steps=5):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:i + time_steps])  # All features
        y.append(data[i + time_steps, 0])     # Target is 'Close'
    return np.array(X), np.array(y)

time_steps = 5
X, y = create_sequences(AAPL_Scaled.values, time_steps)

print(X[0])
print(y[0])

[[0.21775006 0.00342213 0.68020839]
 [0.21155187 0.00466654 0.68852286]
 [0.21736267 0.00587206 0.73914645]
 [0.20806539 0.00859421 0.67915592]
 [0.21147439 0.00598872 0.74593485]]
0.2114743937398311


In [None]:
# Split data into training and testing sets
def split_data(X, y):
  train_size = int(len(X) * 0.8)
  X_train, X_test = X[:train_size], X[train_size:]
  y_train, y_test = y[:train_size], y[train_size:]
  print(y_test.shape)
  print(X_train[0])
  return X_train, y_train, X_test, y_test

X_train, y_train, X_test, y_test = split_data(X, y)

mlflow.log_param("train_size", len(X_train))
mlflow.log_param("test_size", len(X_test))

print(X_test.shape)

(364,)
[[0.21775006 0.00342213 0.68020839]
 [0.21155187 0.00466654 0.68852286]
 [0.21736267 0.00587206 0.73914645]
 [0.20806539 0.00859421 0.67915592]
 [0.21147439 0.00598872 0.74593485]]
(364, 5, 3)


In [16]:
mlflow.end_run()


In [13]:
# Define the Transformer model
def build_transformer_model(input_shape):
    inputs = tf.keras.Input(shape=input_shape)
    x = layers.MultiHeadAttention(num_heads=8, key_dim=128)(inputs, inputs)
    x = layers.LayerNormalization()(x)
    x = layers.Dense(64, activation="relu")(x)
    x = layers.Flatten()(x)
    outputs = layers.Dense(1)(x)  # Predict stock price
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

model_transform = build_transformer_model(X_train.shape[1:])
model_transform.compile(optimizer="adam", loss="mean_squared_error")

model_transform.summary()

In [12]:
def train_and_log_model(model, X_train, y_train, X_test, y_test, model_name, epochs=50, batch_size=16):
    # بدء جلسة MLflow
    with mlflow.start_run(run_name=model_name):
        # تسجيل اسم النموذج
        mlflow.log_param("model_name", model_name)
        mlflow.log_param("epochs", epochs)
        mlflow.log_param("batch_size", batch_size)
        
        # تدريب النموذج
        history = model.fit(
            X_train, y_train,
            validation_data=(X_test, y_test),
            epochs=epochs,
            batch_size=batch_size,
            verbose=1
        )
        
        # تسجيل المقاييس
        for epoch, (train_loss, val_loss) in enumerate(zip(history.history["loss"], history.history["val_loss"])):
            mlflow.log_metric("train_loss", train_loss, step=epoch)
            mlflow.log_metric("val_loss", val_loss, step=epoch)
        
        # تسجيل النموذج المدرب
        if isinstance(model, tf.keras.Model):
            mlflow.tensorflow.log_model(model, artifact_path="model")
        else:
            mlflow.keras.log_model(model, artifact_path="model")
        
        print(f"Model {model_name} logged to MLflow.")



In [17]:
# تدريب النموذج Transformer وتسجيله باستخدام MLflow
train_and_log_model(model_transform, X_train, y_train, X_test, y_test, model_name="Transformer", epochs=50, batch_size=16)

Epoch 1/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 129ms/step - loss: 0.0219 - val_loss: 5.9150e-04
Epoch 2/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 109ms/step - loss: 0.0010 - val_loss: 4.5582e-04
Epoch 3/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 94ms/step - loss: 9.1873e-04 - val_loss: 3.2073e-04
Epoch 4/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 100ms/step - loss: 0.0011 - val_loss: 1.2484e-04
Epoch 5/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 98ms/step - loss: 9.9645e-04 - val_loss: 4.2305e-04
Epoch 6/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 103ms/step - loss: 8.1768e-04 - val_loss: 6.9451e-05
Epoch 7/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 110ms/step - loss: 7.7793e-04 - val_loss: 1.2703e-04
Epoch 8/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 91ms/step - loss: 9.7262e-04 - val_los



Model Transformer logged to MLflow.


In [16]:
# استرجاع المُحسِّن المستخدم في النموذج
optimizer_transformer = model_transform.optimizer

# عرض جميع البرامترات المُستخدمة
optimizer_transformer_config = optimizer_transformer.get_config()

# طباعة القيم
print("Optimizer Parameters:")
for key, value in optimizer_transformer_config.items():
    print(f"{key}: {value}")

Optimizer Parameters:
name: adam
learning_rate: 0.0010000000474974513
weight_decay: None
clipnorm: None
global_clipnorm: None
clipvalue: None
use_ema: False
ema_momentum: 0.99
ema_overwrite_frequency: None
loss_scale_factor: None
gradient_accumulation_steps: None
beta_1: 0.9
beta_2: 0.999
epsilon: 1e-07
amsgrad: False


In [13]:
# Predict scaled values
def evaluation(X_test, model):
  y_pred_scaled = model.predict(X_test)

  # Pad the predictions and true values to match the scaler's input shape
  y_pred_full = np.hstack((y_pred_scaled, np.zeros((len(y_pred_scaled), 2))))  # Add zeros for the other two features
  y_test_full = np.hstack((y_test.reshape(-1, 1), np.zeros((len(y_test), 2))))

  # Inverse transform
  y_pred_rescaled = scaler.inverse_transform(y_pred_full)[:, 0]  # Extract the "Close" column
  y_test_rescaled = scaler.inverse_transform(y_test_full)[:, 0]  # Extract the "Close" column

  mse = mean_squared_error(y_test_rescaled, y_pred_rescaled)
  mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled)

  print(f"Mean Absolute Error (MAE): {mae:.4f}")
  print(f"Mean Squared Error (MSE): {mse:.4f}")
  # Create a DataFrame for comparison
  result_df = pd.DataFrame({
      "Close": y_test_rescaled,
      "Predicted_Close_transform": y_pred_rescaled
  })
  return result_df, mse, mae

In [19]:
result_df, mse, mae = evaluation(X_test, model_transform)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 354ms/step
Mean Absolute Error (MAE): 5.8946
Mean Squared Error (MSE): 37.4672


In [22]:
metrics = {
    "MSE of transformer": mse,
    "MAE of transformer": mae,
}
mlflow.log_metrics(metrics)


In [14]:
# test on data future
def predict_future(model, periods):
    scaler = MinMaxScaler()
    df = pd.read_csv("E:/Task6/Stock_data_test.csv")
    # البيانات المدخلة
    sequnce = df.loc[14:18, ["close", "true_range", "OBV_Moving14"]]
    # sequnce = np.array(sequnce).reshape(-1, 1)
    sequnce = scaler.fit_transform(sequnce)
    print(sequnce)
    predictions = []
    sequence = sequnce.copy()
    time_steps, num_features = sequence.shape  # Infer time steps and feature count

    for _ in range(periods):
        # Reshape sequence for prediction
        scaled_pred = model.predict(sequence.reshape(1, time_steps, num_features))

        # Pad scaled_pred to match feature dimensions
        padded_pred = np.hstack((scaled_pred, np.zeros((1, num_features - 1))))

        # Inverse transform the prediction
        pred = scaler.inverse_transform(padded_pred)
        predictions.append(
            pred[0][0]
        )  # Append the unscaled predicted value (first column)

        # Update the sequence by appending the prediction and removing the oldest value
        sequence = np.vstack((sequence[1:], padded_pred[0]))
    predictions_df = pd.DataFrame(
        {"Predicted Prices": predictions}
    )
 
    return predictions_df

In [24]:
pred = predict_future(model_transform, 59)

[[0.         0.98773006 0.        ]
 [0.0787234  0.57055215 0.196788  ]
 [0.68510638 1.         0.3723829 ]
 [1.         0.         0.76347328]
 [0.86808511 0.90797546 1.        ]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step   
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 15s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0

In [25]:
pred_new_data, test_new_data = [], []
for i in range(len(pred.values)):
    pred_new_data.append(pred.values[i , 0])
pred_new_data = np.array(pred_new_data)

df = pd.read_csv("E:\Task6\Stock_data_test.csv")
df = df[["close"]]

for i in range(len(df.values)-14):
    test_new_data.append(df.values[i+14 , 0])
test_new_data = np.array(test_new_data)

print(test_new_data.shape)
print(pred_new_data.shape)
test_new_data
mlflow.log_param("test_new_size", len(test_new_data))

  df = pd.read_csv("E:\Task6\Stock_data_test.csv")


(59,)
(59,)


59

In [26]:
  mse = mean_squared_error(test_new_data, pred_new_data)
  mae = mean_absolute_error(test_new_data, pred_new_data)

  print(f"Mean Absolute Error (MAE): {mae:.4f}")
  print(f"Mean Squared Error (MSE): {mse:.4f}")

  metrics = {
    "MSE of transformer for new data": mse,
    "MAE of transformer for new data": mae,
}
mlflow.log_metrics(metrics)


Mean Absolute Error (MAE): 9.8719
Mean Squared Error (MSE): 155.2344


In [29]:
mlflow.end_run()


In [42]:
with open("apple_model_transformer.pkl", "wb") as f:
    pickle.dump(model_transform, f)  # save full model

In [15]:
# Build LSTM model
model_lstm = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 3)),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(1)
])
model_lstm.compile(optimizer="adam", loss="mean_squared_error")


  super().__init__(**kwargs)


In [16]:
# تدريب النموذج LSTM وتسجيله باستخدام MLflow
train_and_log_model(model_lstm, X_train, y_train, X_test, y_test, model_name="LSTM", epochs=50, batch_size=16)

Epoch 1/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 178ms/step - loss: 0.0620 - val_loss: 2.8129e-04
Epoch 2/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 119ms/step - loss: 0.0037 - val_loss: 2.6184e-04
Epoch 3/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 147ms/step - loss: 0.0034 - val_loss: 7.6196e-05
Epoch 4/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 131ms/step - loss: 0.0029 - val_loss: 6.0897e-05
Epoch 5/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 122ms/step - loss: 0.0027 - val_loss: 3.6411e-04
Epoch 6/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 139ms/step - loss: 0.0022 - val_loss: 4.8186e-04
Epoch 7/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 124ms/step - loss: 0.0026 - val_loss: 1.3369e-04
Epoch 8/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 123ms/step - loss: 0.0022 - val_loss: 5.7215e-05




Model LSTM logged to MLflow.


In [27]:
optimizer_lstm = model_lstm.optimizer

# عرض جميع البرامترات المُستخدمة
optimizer_lstm_config = optimizer_lstm.get_config()

# طباعة القيم
print("Optimizer Parameters:")
for key, value in optimizer_lstm_config.items():
    print(f"{key}: {value}")

Optimizer Parameters:
name: adam
learning_rate: 0.0010000000474974513
weight_decay: None
clipnorm: None
global_clipnorm: None
clipvalue: None
use_ema: False
ema_momentum: 0.99
ema_overwrite_frequency: None
loss_scale_factor: None
gradient_accumulation_steps: None
beta_1: 0.9
beta_2: 0.999
epsilon: 1e-07
amsgrad: False


In [17]:
result_df, mse, mae = evaluation(X_test, model_lstm)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 395ms/step
Mean Absolute Error (MAE): 1.7130
Mean Squared Error (MSE): 4.4714


In [18]:
metrics = {
    "MSE of lstm": mse,
    "MAE of lstm": mae,
}
mlflow.log_metrics(metrics)


In [20]:
pred_lstm = predict_future(model_lstm, 59)

[[0.         0.98773006 0.        ]
 [0.0787234  0.57055215 0.196788  ]
 [0.68510638 1.         0.3723829 ]
 [1.         0.         0.76347328]
 [0.86808511 0.90797546 1.        ]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 946ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step   
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 353ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 377ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 357ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 341ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 347ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 395ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 349ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 340ms/step
[1m1/1[0m [32m━━

In [21]:
pred_new_data, test_new_data = [], []
for i in range(len(pred_lstm.values)):
    pred_new_data.append(pred_lstm.values[i , 0])
pred_new_data = np.array(pred_new_data)

df = pd.read_csv("E:\Task6\Stock_data_test.csv")
df = df[["close"]]

for i in range(len(df.values)-14):
    test_new_data.append(df.values[i+14 , 0])
test_new_data = np.array(test_new_data)

print(test_new_data.shape)
print(pred_new_data.shape)
test_new_data

(59,)
(59,)


  df = pd.read_csv("E:\Task6\Stock_data_test.csv")


array([231.78, 232.15, 235.  , 236.48, 235.86, 230.76, 230.57, 231.41,
       233.4 , 233.67, 230.1 , 225.91, 222.91, 222.01, 223.45, 222.72,
       227.48, 226.96, 224.23, 224.23, 225.12, 228.22, 225.  , 228.02,
       228.28, 229.  , 228.52, 229.87, 232.87, 235.06, 234.93, 237.33,
       239.59, 242.65, 243.01, 243.04, 242.84, 246.75, 247.77, 246.49,
       247.96, 248.13, 251.04, 253.48, 248.05, 249.79, 254.49, 255.27,
       258.2 , 259.02, 255.59, 252.2 , 250.42, 243.85, 243.36, 245.  ,
       242.21, 242.7 , 236.85])

In [34]:
  mse = mean_squared_error(test_new_data, pred_new_data)
  mae = mean_absolute_error(test_new_data, pred_new_data)

  print(f"Mean Absolute Error (MAE): {mae:.4f}")
  print(f"Mean Squared Error (MSE): {mse:.4f}")

Mean Absolute Error (MAE): 9.8601
Mean Squared Error (MSE): 144.7322


In [22]:
metrics = {
    "MSE of lstm for new data": mse,
    "MAE of lstm for new data": mae,
}
mlflow.log_metrics(metrics)

In [56]:
with open("apple_model_lstm.pkl", "wb") as f:
    pickle.dump(model_lstm, f)  # save full model