In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense

# Load dataset
data = pd.read_csv("Datasetscomp1/TSLA.csv")
df = pd.DataFrame(data)
df = df.dropna()

# Feature Scaling
scaler = MinMaxScaler()
df["Close"] = scaler.fit_transform(df["Close"].values.reshape(-1, 1))

# Create sequences for LSTM/GRU
def create_sequences(data, seq_length):
    sequences = []
    labels = []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i + seq_length])
        labels.append(data[i + seq_length])
    return np.array(sequences), np.array(labels)

seq_length = 50  # Number of past days to use for prediction
close_prices = df["Close"].values
X, y = create_sequences(close_prices, seq_length)

# Split data into training and testing sets
split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Reshape input for LSTM/GRU (samples, time steps, features)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Build LSTM/GRU Model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(seq_length, 1)),
    GRU(50, return_sequences=False),
    Dense(25),
    Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=16, validation_data=(X_test, y_test))

# Make predictions on the test set
y_pred = model.predict(X_test)
y_pred = scaler.inverse_transform(y_pred)
y_test = scaler.inverse_transform(y_test.reshape(-1, 1))

# Calculate evaluation metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Model Evaluation")
print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R2 Score: {r2:.2f}")

"""
# Generate future dates
start_date = datetime.strptime("2023-01-01", "%Y-%m-%d")
predict_dates = [start_date + timedelta(days=i) for i in range(10)]

# Forecast future prices
future_prices = []
current_sequence = X_test[-1]  # Start with the last known sequence

for _ in range(10):
    predicted_price = model.predict(current_sequence.reshape(1, seq_length, 1))[0, 0]
    future_prices.append(predicted_price)
    current_sequence = np.roll(current_sequence, -1)
    current_sequence[-1] = predicted_price  # Update sequence with prediction

# Convert prices back to original scale
future_prices = scaler.inverse_transform(np.array(future_prices).reshape(-1, 1)).flatten()

# Print predictions
for date, price in zip(predict_dates, future_prices):
    print(f"Predicted stock price for {date.strftime('%Y-%m-%d')}: ${price:.2f}")
"""

Epoch 1/20


  super().__init__(**kwargs)


[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - loss: 1.5442e-04 - val_loss: 0.0032
Epoch 2/20
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 5.3987e-06 - val_loss: 0.0025
Epoch 3/20
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 5.9825e-06 - val_loss: 0.0027
Epoch 4/20
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 4.7690e-06 - val_loss: 0.0020
Epoch 5/20
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 6.1556e-06 - val_loss: 0.0018
Epoch 6/20
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 4.8919e-06 - val_loss: 0.0012
Epoch 7/20
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 5.3359e-06 - val_loss: 0.0023
Epoch 8/20
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 4.2392e-06 - val_loss: 0.0014
Epoch 9/20


'\n# Forecast future prices\nfuture_prices = []\ncurrent_sequence = X_test[-1]  # Start with the last known sequence\n\nfor _ in range(10):\n    predicted_price = model.predict(current_sequence.reshape(1, seq_length, 1))[0, 0]\n    future_prices.append(predicted_price)\n    current_sequence = np.roll(current_sequence, -1)\n    current_sequence[-1] = predicted_price  # Update sequence with prediction\n\n# Convert prices back to original scale\nfuture_prices = scaler.inverse_transform(np.array(future_prices).reshape(-1, 1)).flatten()\n\n# Print predictions\nfor date, price in zip(predict_dates, future_prices):\n    print(f"Predicted stock price for {date.strftime(\'%Y-%m-%d\')}: ${price:.2f}")\n'

In [22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import xgboost as xgb
import lightgbm as lgb

# Load dataset
data = pd.read_csv("Datasetscomp1/TSLA.csv")
df = pd.DataFrame(data)
df = df.dropna()

# Feature Scaling
scaler = MinMaxScaler()
df["Close"] = scaler.fit_transform(df["Close"].values.reshape(-1, 1))

# Create sequences for training
def create_sequences(data, seq_length):
    sequences = []
    labels = []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i + seq_length])
        labels.append(data[i + seq_length])
    return np.array(sequences), np.array(labels)

seq_length = 50  # Number of past days to use for prediction
close_prices = df["Close"].values
X, y = create_sequences(close_prices, seq_length)

# Reshape data for XGBoost/LightGBM
X = X.reshape(X.shape[0], -1)  # Flatten sequences

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

# Train XGBoost Model
xgb_model = xgb.XGBRegressor(n_estimators=100, random_state=42)
xgb_model.fit(X_train, y_train)

# Train LightGBM Model
lgb_model = lgb.LGBMRegressor(n_estimators=100, random_state=42)
lgb_model.fit(X_train, y_train)

# Make predictions
xgb_pred = xgb_model.predict(X_test)
lgb_pred = lgb_model.predict(X_test)

xgb_pred = scaler.inverse_transform(xgb_pred.reshape(-1, 1))
lgb_pred = scaler.inverse_transform(lgb_pred.reshape(-1, 1))
y_test = scaler.inverse_transform(y_test.reshape(-1, 1))

# Calculate evaluation metrics for XGBoost
xgb_mae = mean_absolute_error(y_test, xgb_pred)
xgb_mse = mean_squared_error(y_test, xgb_pred)
xgb_rmse = np.sqrt(xgb_mse)
xgb_r2 = r2_score(y_test, xgb_pred)

# Calculate evaluation metrics for LightGBM
lgb_mae = mean_absolute_error(y_test, lgb_pred)
lgb_mse = mean_squared_error(y_test, lgb_pred)
lgb_rmse = np.sqrt(lgb_mse)
lgb_r2 = r2_score(y_test, lgb_pred)

print("XGBoost Model Evaluation")
print(f"MAE: {xgb_mae:.2f}")
print(f"MSE: {xgb_mse:.2f}")
print(f"RMSE: {xgb_rmse:.2f}")
print(f"R2 Score: {xgb_r2:.2f}")

print("\nLightGBM Model Evaluation")
print(f"MAE: {lgb_mae:.2f}")
print(f"MSE: {lgb_mse:.2f}")
print(f"RMSE: {lgb_rmse:.2f}")
print(f"R2 Score: {lgb_r2:.2f}")

# Generate future dates
start_date = datetime.strptime("2023-01-01", "%Y-%m-%d")
predict_dates = [start_date + timedelta(days=i) for i in range(10)]

# Forecast future prices using XGBoost
future_prices_xgb = []
current_sequence = X_test[-1]
for _ in range(10):
    predicted_price = xgb_model.predict(current_sequence.reshape(1, -1))[0]
    future_prices_xgb.append(predicted_price)
    current_sequence = np.roll(current_sequence, -1)
    current_sequence[-1] = predicted_price
future_prices_xgb = scaler.inverse_transform(np.array(future_prices_xgb).reshape(-1, 1)).flatten()

# Forecast future prices using LightGBM
future_prices_lgb = []
current_sequence = X_test[-1]
for _ in range(10):
    predicted_price = lgb_model.predict(current_sequence.reshape(1, -1))[0]
    future_prices_lgb.append(predicted_price)
    current_sequence = np.roll(current_sequence, -1)
    current_sequence[-1] = predicted_price
future_prices_lgb = scaler.inverse_transform(np.array(future_prices_lgb).reshape(-1, 1)).flatten()

# Print predictions
print("\nFuture Predictions (XGBoost)")
for date, price in zip(predict_dates, future_prices_xgb):
    print(f"Predicted stock price for {date.strftime('%Y-%m-%d')}: ${price:.2f}")

print("\nFuture Predictions (LightGBM)")
for date, price in zip(predict_dates, future_prices_lgb):
    print(f"Predicted stock price for {date.strftime('%Y-%m-%d')}: ${price:.2f}")


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000668 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12750
[LightGBM] [Info] Number of data points in the train set: 2324, number of used features: 50
[LightGBM] [Info] Start training from score 0.027581
XGBoost Model Evaluation
MAE: 485.33
MSE: 333579.57
RMSE: 577.56
R2 Score: -2.40

LightGBM Model Evaluation
MAE: 483.53
MSE: 331755.95
RMSE: 575.98
R2 Score: -2.38

Future Predictions (XGBoost)
Predicted stock price for 2023-01-01: $71.38
Predicted stock price for 2023-01-02: $68.09
Predicted stock price for 2023-01-03: $66.44
Predicted stock price for 2023-01-04: $62.84
Predicted stock price for 2023-01-05: $60.10
Predicted stock price for 2023-01-06: $57.39
Predicted stock price for 2023-01-07: $55.04
Predicted stock price for 2023-01-08: $53.10
Predicted stock price for 2023-01-09: $52.58
Predicted stock price for 2023-01-10: $52.58

Future Predic

