In [1]:
import pandas as pd
df = pd.read_csv('/content/2022_Global_Markets_Data.csv')

In [2]:
import pandas as pd
import plotly.express as px

# Convert 'Date' column to datetime objects
df['Date'] = pd.to_datetime(df['Date'])

# Create the time series plot using Plotly Express
fig = px.line(df, x='Date', y='Close', color='Ticker', title='Time Series Plot of Closing Prices')
fig.show()

STOCK DJI

In [3]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Specify the stock ticker you want to predict
ticker = '^DJI'

# Filter the DataFrame for the specific stock
df_filtered = df[df['Ticker'] == ticker]

# Check if the 'Close' column exists and is not empty
if 'Close' in df_filtered.columns and not df_filtered['Close'].empty:
    # Feature scaling
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(df_filtered['Close'].values.reshape(-1, 1))

    # Split data into training and testing sets (adjust test_size as needed)
    training_size = int(len(scaled_data) * 0.8)
    train_data = scaled_data[0:training_size, :]
    test_data = scaled_data[training_size:, :]

    # Function to create datasets for LSTM
    def create_dataset(dataset, look_back=10):
        X, Y = [], []
        for i in range(len(dataset) - look_back - 1):
            X.append(dataset[i:(i + look_back), 0])
            Y.append(dataset[i + look_back, 0])
        return np.array(X), np.array(Y)

    # Prepare train and test datasets
    look_back = 10
    X_train, Y_train = create_dataset(train_data, look_back)
    X_test, Y_test = create_dataset(test_data, look_back)

    if X_train.size > 0 and X_test.size > 0:
        # Reshape input for LSTM
        X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
        X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

        # Build LSTM model
        model = Sequential([
            LSTM(units=50, return_sequences=True, input_shape=(look_back, 1)),
            LSTM(units=50),
            Dense(1)
        ])

        model.compile(loss='mean_squared_error', optimizer='adam')
        model.fit(X_train, Y_train, epochs=10, batch_size=32)

        # Make predictions
        train_predict = model.predict(X_train)
        test_predict = model.predict(X_test)

        # Invert scaling
        train_predict = scaler.inverse_transform(train_predict)
        test_predict = scaler.inverse_transform(test_predict)

else:
    print(f"'Close' column is missing or empty for ticker '{ticker}'.")


Epoch 1/10



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 17ms/step - loss: 0.1839
Epoch 2/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.0348
Epoch 3/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0342
Epoch 4/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.0139
Epoch 5/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0165
Epoch 6/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.0139
Epoch 7/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0137
Epoch 8/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.0123
Epoch 9/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0108
Epoch 10/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0119
[1m6/6[0m [32m━━━━━━━━━━━━━

In [4]:
import numpy as np
import pandas as pd
import random

# Assuming 'scaled_data', 'look_back', 'model', 'scaler', and 'df_filtered' are already defined

future_input = scaled_data[-look_back:]
future_predictions = []
future_predictions_noisy = []

for _ in range(6 * 20):  # Assuming 20 trading days per month
    x_input = future_input.reshape((1, look_back, 1))

    # Get the actual prediction
    y_pred = model.predict(x_input)
    future_predictions.append(y_pred[0, 0])  # Store the actual prediction

    # Inject random noise (adjust noise level as needed)
    noise = random.uniform(-0.02, 0.02)  # Small noise within ±2%
    y_pred_noisy = y_pred[0, 0] + noise
    future_predictions_noisy.append(y_pred_noisy)  # Store the noisy prediction

    # Update future_input for the next prediction
    future_input = np.append(future_input[1:], y_pred_noisy)

# Convert predictions back to original scale
future_predictions = np.array(future_predictions).reshape(-1, 1)
future_predictions_noisy = np.array(future_predictions_noisy).reshape(-1, 1)

future_predictions_inv = scaler.inverse_transform(future_predictions)
future_predictions_noisy_inv = scaler.inverse_transform(future_predictions_noisy)

# Create a DataFrame for forecasted values
last_date = df_filtered['Date'].iloc[-1]
future_dates = pd.date_range(start=last_date + pd.DateOffset(days=1), periods=len(future_predictions_inv))

forecast_df = pd.DataFrame({
    'Date': future_dates,
    'Close_Actual': future_predictions_inv.flatten(),
    'Close_Noisy': future_predictions_noisy_inv.flatten(),
    'Ticker': ticker
})

print(f"Future predictions for {ticker} (Actual): {future_predictions_inv.flatten()}")
print(f"Future predictions for {ticker} (Noisy): {future_predictions_noisy_inv.flatten()}")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 150ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

In [5]:
# Filter actual test data for months in the forecasted period (Jan–May)
test_data_1 = pd.read_csv("/content/2023_Global_Markets_Data.csv")
filtered_data = test_data_1[test_data_1['Ticker'] == ticker]
filtered_data = filtered_data[['Ticker', 'Date', 'Close']]
filtered_data['Date'] = pd.to_datetime(filtered_data['Date'])
filtered_data['Month'] = filtered_data['Date'].dt.month
filtered_data = filtered_data[filtered_data['Month'] <= 5].drop('Month', axis=1)

In [8]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

# Assuming you have the following variables defined:
# future_predictions_inv: Actual forecasted values without noise
# future_predictions_noisy_inv: Forecasted values with noise
# last_date: The last date from your actual data
# filtered_data: Your actual data DataFrame containing 'Date' and 'Close' columns

# Step 1: Create a DataFrame for the forecasted values
future_dates = pd.date_range(start=last_date + pd.DateOffset(days=1), periods=len(future_predictions_inv))
forecast_df = pd.DataFrame({
    'Date': future_dates,
    'Close_forecast': future_predictions_inv.flatten(),
    'Close_noisy_forecast': future_predictions_noisy_inv.flatten(),
    'Ticker': ticker  # Optional, if you want to keep track of the ticker
})

# Step 2: Merge actual and forecasted data
merged_df = pd.merge(forecast_df, filtered_data, on='Date', how='inner', suffixes=('_forecast', '_actual'))

# Step 3: Calculate error metrics
mae = np.mean(np.abs(merged_df['Close_forecast'] - merged_df['Close']))
rmse = np.sqrt(np.mean((merged_df['Close_forecast'] - merged_df['Close'])**2))
mape = np.mean(np.abs((merged_df['Close_forecast'] - merged_df['Close']) / merged_df['Close'])) * 100

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

# Step 4: Create interactive visualization
fig = go.Figure()

# Actual Data (Filtered)
fig.add_trace(go.Scatter(
    x=merged_df['Date'],
    y=merged_df['Close'],
    mode='lines',
    name='Actual (Filtered)',
    line=dict(color='blue')
))

# Forecasted Data (without noise)
fig.add_trace(go.Scatter(
    x=merged_df['Date'],
    y=merged_df['Close_forecast'],
    mode='lines',
    name='Forecast (No Noise)',
    line=dict(dash='dash', color='orange')
))

# Forecasted Data (with noise)
fig.add_trace(go.Scatter(
    x=merged_df['Date'],
    y=merged_df['Close_noisy_forecast'],
    mode='lines',
    name='Forecast (With Noise)',
    line=dict(dash='dot', color='red')
))

# Update layout
fig.update_layout(
    title='Filtered Actual vs Forecasted Close Prices',
    xaxis_title='Date',
    yaxis_title='Close Price',
    hovermode="x"
)

fig.show()

Mean Absolute Error (MAE): 1075.9476755401236
Root Mean Squared Error (RMSE): 1232.3592079720654
Mean Absolute Percentage Error (MAPE): 3.19%


STOCK NYA

In [9]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Specify the stock ticker you want to predict
ticker = '^NYA'

# Filter the DataFrame for the specific stock
df_filtered = df[df['Ticker'] == ticker]

# Check if the 'Close' column exists and is not empty
if 'Close' in df_filtered.columns and not df_filtered['Close'].empty:
    # Feature scaling
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(df_filtered['Close'].values.reshape(-1, 1))

    # Split data into training and testing sets (adjust test_size as needed)
    training_size = int(len(scaled_data) * 0.8)
    train_data = scaled_data[0:training_size, :]
    test_data = scaled_data[training_size:, :]

    # Function to create datasets for LSTM
    def create_dataset(dataset, look_back=10):
        X, Y = [], []
        for i in range(len(dataset) - look_back - 1):
            X.append(dataset[i:(i + look_back), 0])
            Y.append(dataset[i + look_back, 0])
        return np.array(X), np.array(Y)

    # Prepare train and test datasets
    look_back = 10
    X_train, Y_train = create_dataset(train_data, look_back)
    X_test, Y_test = create_dataset(test_data, look_back)

    if X_train.size > 0 and X_test.size > 0:
        # Reshape input for LSTM
        X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
        X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

        # Build LSTM model
        model = Sequential([
            LSTM(units=50, return_sequences=True, input_shape=(look_back, 1)),
            LSTM(units=50),
            Dense(1)
        ])

        model.compile(loss='mean_squared_error', optimizer='adam')
        model.fit(X_train, Y_train, epochs=10, batch_size=32)

        # Make predictions
        train_predict = model.predict(X_train)
        test_predict = model.predict(X_test)

        # Invert scaling
        train_predict = scaler.inverse_transform(train_predict)
        test_predict = scaler.inverse_transform(test_predict)

else:
    print(f"'Close' column is missing or empty for ticker '{ticker}'.")


Epoch 1/10



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - loss: 0.3319
Epoch 2/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.0764
Epoch 3/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.0288
Epoch 4/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.0246
Epoch 5/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0190
Epoch 6/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 0.0187
Epoch 7/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.0153
Epoch 8/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.0148
Epoch 9/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.0118
Epoch 10/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.0128
[1m6/6[0m [32m━━━━━━━━━━━━━

In [10]:
import numpy as np
import pandas as pd
import random

# Assuming 'scaled_data', 'look_back', 'model', 'scaler', and 'df_filtered' are already defined

future_input = scaled_data[-look_back:]
future_predictions = []
future_predictions_noisy = []

for _ in range(6 * 20):  # Assuming 20 trading days per month
    x_input = future_input.reshape((1, look_back, 1))

    # Get the actual prediction
    y_pred = model.predict(x_input)
    future_predictions.append(y_pred[0, 0])  # Store the actual prediction

    # Inject random noise (adjust noise level as needed)
    noise = random.uniform(-0.02, 0.02)  # Small noise within ±2%
    y_pred_noisy = y_pred[0, 0] + noise
    future_predictions_noisy.append(y_pred_noisy)  # Store the noisy prediction

    # Update future_input for the next prediction
    future_input = np.append(future_input[1:], y_pred_noisy)

# Convert predictions back to original scale
future_predictions = np.array(future_predictions).reshape(-1, 1)
future_predictions_noisy = np.array(future_predictions_noisy).reshape(-1, 1)

future_predictions_inv = scaler.inverse_transform(future_predictions)
future_predictions_noisy_inv = scaler.inverse_transform(future_predictions_noisy)

# Create a DataFrame for forecasted values
last_date = df_filtered['Date'].iloc[-1]
future_dates = pd.date_range(start=last_date + pd.DateOffset(days=1), periods=len(future_predictions_inv))

forecast_df = pd.DataFrame({
    'Date': future_dates,
    'Close_Actual': future_predictions_inv.flatten(),
    'Close_Noisy': future_predictions_noisy_inv.flatten(),
    'Ticker': ticker
})

print(f"Future predictions for {ticker} (Actual): {future_predictions_inv.flatten()}")
print(f"Future predictions for {ticker} (Noisy): {future_predictions_noisy_inv.flatten()}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59

In [11]:
# Filter actual test data for months in the forecasted period (Jan–May)
test_data_1 = pd.read_csv("/content/2023_Global_Markets_Data.csv")
filtered_data = test_data_1[test_data_1['Ticker'] == ticker]
filtered_data = filtered_data[['Ticker', 'Date', 'Close']]
filtered_data['Date'] = pd.to_datetime(filtered_data['Date'])
filtered_data['Month'] = filtered_data['Date'].dt.month
filtered_data = filtered_data[filtered_data['Month'] <= 5].drop('Month', axis=1)

In [12]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

# Assuming you have the following variables defined:
# future_predictions_inv: Actual forecasted values without noise
# future_predictions_noisy_inv: Forecasted values with noise
# last_date: The last date from your actual data
# filtered_data: Your actual data DataFrame containing 'Date' and 'Close' columns

# Step 1: Create a DataFrame for the forecasted values
future_dates = pd.date_range(start=last_date + pd.DateOffset(days=1), periods=len(future_predictions_inv))
forecast_df = pd.DataFrame({
    'Date': future_dates,
    'Close_forecast': future_predictions_inv.flatten(),
    'Close_noisy_forecast': future_predictions_noisy_inv.flatten(),
    'Ticker': ticker  # Optional, if you want to keep track of the ticker
})

# Step 2: Merge actual and forecasted data
merged_df = pd.merge(forecast_df, filtered_data, on='Date', how='inner', suffixes=('_forecast', '_actual'))

# Step 3: Calculate error metrics
mae = np.mean(np.abs(merged_df['Close_forecast'] - merged_df['Close']))
rmse = np.sqrt(np.mean((merged_df['Close_forecast'] - merged_df['Close'])**2))
mape = np.mean(np.abs((merged_df['Close_forecast'] - merged_df['Close']) / merged_df['Close'])) * 100

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

# Step 4: Create interactive visualization
fig = go.Figure()

# Actual Data (Filtered)
fig.add_trace(go.Scatter(
    x=merged_df['Date'],
    y=merged_df['Close'],
    mode='lines',
    name='Actual (Filtered)',
    line=dict(color='blue')
))

# Forecasted Data (without noise)
fig.add_trace(go.Scatter(
    x=merged_df['Date'],
    y=merged_df['Close_forecast'],
    mode='lines',
    name='Forecast (No Noise)',
    line=dict(dash='dash', color='orange')
))

# Forecasted Data (with noise)
fig.add_trace(go.Scatter(
    x=merged_df['Date'],
    y=merged_df['Close_noisy_forecast'],
    mode='lines',
    name='Forecast (With Noise)',
    line=dict(dash='dot', color='red')
))

# Update layout
fig.update_layout(
    title='Filtered Actual vs Forecasted Close Prices',
    xaxis_title='Date',
    yaxis_title='Close Price',
    hovermode="x"
)

fig.show()

Mean Absolute Error (MAE): 552.1358145254629
Root Mean Squared Error (RMSE): 626.9189496073593
Mean Absolute Percentage Error (MAPE): 3.52%
