# OHLC Based Sentiment Analysis


In [None]:
import pandas as pd
data = pd.read_csv('/content/hdbk_ohlc.csv')
data.astype({'f': 'datetime64[ns]'})
data.astype({'Open': 'float64'})
data.astype({'High': 'float64'})
data.astype({'Low': 'float64'})
data['Close'] = data['Open']*(1+data['Change %']/100)
data.head()

Unnamed: 0,f,Price,Open,High,Low,Vol.,Change %,Close
0,18-10-2024,1681.85,1663.1,1691.0,1654.1,12.40M,0.01,1663.26631
1,17-10-2024,1673.15,1694.95,1697.65,1665.2,7.35M,-0.02,1694.61101
2,16-10-2024,1699.8,1680.1,1707.95,1680.1,10.24M,0.01,1680.26801
3,15-10-2024,1684.1,1694.0,1698.0,1675.5,9.83M,0.0,1694.0
4,14-10-2024,1688.1,1656.05,1692.05,1654.0,9.26M,0.02,1656.38121


In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import tensorflow as tf

# Enable GPU memory growth
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU memory growth enabled.")
    except RuntimeError as e:
        print(e)

# Assuming 'data' is your DataFrame containing stock data
# Feature scaling
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data[['Open', 'High', 'Low', 'Close']].values)

# Create a sliding window with a window size of 60 days
def create_dataset(data, window_size=60):
    X, y = [], []
    for i in range(window_size, len(data)):
        X.append(data[i-window_size:i])  # 60-day historical window
        y.append(data[i, 3])  # Close price as target
    return np.array(X), np.array(y)

window_size = 60
X, y = create_dataset(scaled_data, window_size)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape data for LSTM input
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 4))  # 4 features: Open, High, Low, Close
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 4))

# Build the LSTM model
model = Sequential()
model.add(LSTM(units=128, return_sequences=True, input_shape=(window_size, 4)))
model.add(Dropout(0.2))
model.add(LSTM(units=64, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=25))
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), verbose=1)

# Model evaluation
train_loss = model.evaluate(X_train, y_train, verbose=0)
test_loss = model.evaluate(X_test, y_test, verbose=0)
print(f'Train RMSE: {np.sqrt(train_loss)}')
print(f'Test RMSE: {np.sqrt(test_loss)}')

# Make predictions
predictions = model.predict(X_test)

# Inverse transform the predictions to the original scale
predictions = scaler.inverse_transform(np.concatenate((np.zeros((predictions.shape[0], 3)), predictions), axis=1))[:, -1]

# Inverse transform the y_test to the original scale
actual_values = scaler.inverse_transform(np.concatenate((np.zeros((y_test.shape[0], 3)), y_test.reshape(-1, 1)), axis=1))[:, -1]

# Calculate the percentage change from actual to predicted
def calculate_percentage_change(predicted, actual):
    return (predicted - actual) / actual * 100

# Calculate a more realistic score based on the percentage change
def calculate_score(predictions, actual):
    percentage_changes = calculate_percentage_change(predictions, actual)

    # Initialize scores
    scores = np.zeros_like(percentage_changes)

    # Map percentage changes to scores
    for i in range(len(percentage_changes)):
        change = percentage_changes[i]
        if change > 1:  # Strong bullish sentiment
            scores[i] = min(100, 50 + (change * 10))  # Cap at 100
        elif change > 0:  # Mild bullish sentiment
            scores[i] = min(100, 50 + change)  # Cap at 100
        elif change < -1:  # Strong bearish sentiment
            scores[i] = max(0, 50 + (change * 10))  # Floor at 0
        elif change < 0:  # Mild bearish sentiment
            scores[i] = max(0, 50 + change)  # Floor at 0

    return scores

# Calculate the scores based on predictions and actual values
scores = calculate_score(predictions, actual_values)

# Output the predictions and their corresponding scores
for i in range(len(predictions)):
    print(f'Predicted: {predictions[i]:.2f}, Actual: {actual_values[i]:.2f}, Score: {scores[i]:.2f}, Change: {calculate_percentage_change(predictions[i], actual_values[i]):.2f}')

# Ensure you have the dates corresponding to the test set
# Assuming your original data has a 'Date' column
dates = data['f'].values[window_size:][len(X_train):]  # Adjust to get the correct dates for the test set

# Create a DataFrame to hold the results
results_df = pd.DataFrame({
    'Date': dates,
    'Score': scores
})

# Save the DataFrame to a CSV file
results_df.to_csv('predictions_with_scores.csv', index=False)

print("Predictions and scores saved to 'predictions_with_scores.csv'.")


GPU memory growth enabled.


  super().__init__(**kwargs)


Epoch 1/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 196ms/step - loss: 0.1683 - val_loss: 0.0193
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0243 - val_loss: 0.0223
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.0255 - val_loss: 0.0104
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0212 - val_loss: 0.0111
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0144 - val_loss: 0.0091
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.0165 - val_loss: 0.0074
Epoch 7/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.0116 - val_loss: 0.0074
Epoch 8/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.0108 - val_loss: 0.0059
Epoch 9/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

# Nifty Bank Analysis


In [None]:
import pandas as pd
data = pd.read_csv('/content/Nifty Bank Historical Data(2).csv')
data.astype({'Date': 'datetime64[ns]'})
data.astype({'Open': 'float64'})
data.astype({'High': 'float64'})
data.astype({'Low': 'float64'})
data['Close'] = data['Open']*(1+data['Change %']/100)
data.head()

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %,Close
0,18-10-2024,52094.2,51261.0,52199.45,51000.9,139.08M,0.02,51271.2522
1,17-10-2024,51288.8,51849.3,51930.2,51150.25,107.82M,-0.01,51844.11507
2,16-10-2024,51801.05,51711.2,52031.6,51711.2,94.42M,0.0,51711.2
3,15-10-2024,51906.0,51975.95,52022.05,51698.75,101.41M,0.0,51975.95
4,14-10-2024,51816.9,51263.25,51893.0,51220.85,126.59M,0.01,51268.376325


In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import tensorflow as tf

# Enable GPU memory growth
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU memory growth enabled.")
    except RuntimeError as e:
        print(e)

# Assuming 'data' is your DataFrame containing stock data
# Feature scaling
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data[['Open', 'High', 'Low', 'Close']].values)

# Create a sliding window with a window size of 60 days
def create_dataset(data, window_size=60):
    X, y = [], []
    for i in range(window_size, len(data)):
        X.append(data[i-window_size:i])  # 60-day historical window
        y.append(data[i, 3])  # Close price as target
    return np.array(X), np.array(y)

window_size = 60
X, y = create_dataset(scaled_data, window_size)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape data for LSTM input
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 4))  # 4 features: Open, High, Low, Close
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 4))

# Build the LSTM model
model = Sequential()
model.add(LSTM(units=128, return_sequences=True, input_shape=(window_size, 4)))
model.add(Dropout(0.2))
model.add(LSTM(units=64, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=25))
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), verbose=1)

# Model evaluation
train_loss = model.evaluate(X_train, y_train, verbose=0)
test_loss = model.evaluate(X_test, y_test, verbose=0)
print(f'Train RMSE: {np.sqrt(train_loss)}')
print(f'Test RMSE: {np.sqrt(test_loss)}')

# Make predictions
predictions = model.predict(X_test)

# Inverse transform the predictions to the original scale
predictions = scaler.inverse_transform(np.concatenate((np.zeros((predictions.shape[0], 3)), predictions), axis=1))[:, -1]

# Inverse transform the y_test to the original scale
actual_values = scaler.inverse_transform(np.concatenate((np.zeros((y_test.shape[0], 3)), y_test.reshape(-1, 1)), axis=1))[:, -1]

# Calculate the percentage change from actual to predicted
def calculate_percentage_change(predicted, actual):
    return (predicted - actual) / actual * 100

# Calculate a more realistic score based on the percentage change
def calculate_score(predictions, actual):
    percentage_changes = calculate_percentage_change(predictions, actual)

    # Initialize scores
    scores = np.zeros_like(percentage_changes)

    # Map percentage changes to scores
    for i in range(len(percentage_changes)):
        change = percentage_changes[i]
        if change > 1:  # Strong bullish sentiment
            scores[i] = min(100, 50 + (change * 10))  # Cap at 100
        elif change > 0:  # Mild bullish sentiment
            scores[i] = min(100, 50 + change)  # Cap at 100
        elif change < -1:  # Strong bearish sentiment
            scores[i] = max(0, 50 + (change * 10))  # Floor at 0
        elif change < 0:  # Mild bearish sentiment
            scores[i] = max(0, 50 + change)  # Floor at 0

    return scores

# Calculate the scores based on predictions and actual values
scores = calculate_score(predictions, actual_values)

# Output the predictions and their corresponding scores
for i in range(len(predictions)):
    print(f'Predicted: {predictions[i]:.2f}, Actual: {actual_values[i]:.2f}, Score: {scores[i]:.2f}, Change: {calculate_percentage_change(predictions[i], actual_values[i]):.2f}')

# Ensure you have the dates corresponding to the test set
# Assuming your original data has a 'Date' column
dates = data['Date'].values[window_size:][len(X_train):]  # Adjust to get the correct dates for the test set

# Create a DataFrame to hold the results
results_df = pd.DataFrame({
    'Date': dates,
    'Score': scores
})

# Save the DataFrame to a CSV file
results_df.to_csv('sector_predictions_with_scores.csv', index=False)

print("Predictions and scores saved to 'predictions_with_scores.csv'.")


GPU memory growth enabled.
Epoch 1/100


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 99ms/step - loss: 0.1130 - val_loss: 0.0106
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 0.0135 - val_loss: 0.0129
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.0140 - val_loss: 0.0116
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.0117 - val_loss: 0.0079
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.0110 - val_loss: 0.0073
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.0089 - val_loss: 0.0068
Epoch 7/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.0086 - val_loss: 0.0063
Epoch 8/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.0077 - val_loss: 0.0058
Epoch 9/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0



[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 247ms/step



[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 258ms/step
Predicted: 49558.89, Actual: 49808.15, Score: 49.50, Change: -0.50
Predicted: 44120.07, Actual: 43915.10, Score: 50.47, Change: 0.47
Predicted: 46468.34, Actual: 46421.90, Score: 50.10, Change: 0.10
Predicted: 44419.14, Actual: 44185.77, Score: 50.53, Change: 0.53
Predicted: 49122.47, Actual: 48655.13, Score: 50.96, Change: 0.96
Predicted: 47093.19, Actual: 46735.40, Score: 50.77, Change: 0.77
Predicted: 48039.17, Actual: 48197.30, Score: 49.67, Change: -0.33
Predicted: 43359.89, Actual: 43356.85, Score: 50.01, Change: 0.01
Predicted: 52057.58, Actual: 52523.55, Score: 49.11, Change: -0.89
Predicted: 44661.57, Actual: 44031.95, Score: 64.30, Change: 1.43
Predicted: 47804.63, Actual: 47810.18, Score: 49.99, Change: -0.01
Predicted: 43060.51, Actual: 43818.32, Score: 32.71, Change: -1.73
Predicted: 44718.39, Actual: 44707.35, Score: 50.02, Change: 0.02
Predicted: 45821.62, Actual: 45488.20, Score: 50.73, Change: 0.7

# Balance Sheet Analysis

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset (assuming it's a CSV file)
data = pd.read_csv('/content/hdfc_financials.csv')

# Convert Report Date to datetime format
data['Report Date'] = pd.to_datetime(data['Report Date'], format='%b-%y', errors='coerce')
# Set the Report Date as the index for easier plotting
data.set_index('Report Date', inplace=True)

In [None]:
import pandas as pd
import numpy as np

# Load the dataset (assuming it's a CSV file)
data = pd.read_csv('/content/hdfc_financials.csv')

# Convert Report Date to datetime format
data['Report Date'] = pd.to_datetime(data['Report Date'], format='%b-%y', errors='coerce')
# Set the Report Date as the index for easier plotting
data.set_index('Report Date', inplace=True)

# Drop unnecessary columns
# columns_to_drop = ['Unnamed: 30', 'Unnamed: 31', 'Unnamed: 32', 'Unnamed: 33',
#                    'Unnamed: 34', 'Unnamed: 35', 'Unnamed: 36', 'Unnamed: 37',
#                    'Unnamed: 38', 'Unnamed: 39', 'Unnamed: 40', 'Unnamed: 41',
#                    'Unnamed: 42', 'Unnamed: 43', 'Unnamed: 44', 'Unnamed: 45',
#                    'Unnamed: 46', 'Unnamed: 47']
# data.drop(columns=columns_to_drop, inplace=True)

# Drop all rows with NaN values
data.dropna(inplace=True)

# Calculate additional metrics
data['Net Profit Margin'] = data[' Net profit '] / data[' Sales ']
data['Return on Equity (ROE)'] = data[' Net profit '] / data[' Equity Share Capital ']
data['Debt to Equity Ratio'] = data[' Borrowings '] / data[' Equity Share Capital ']
data['Cash Flow from Operations to Current Liabilities'] = data[' Cash from Operating Activity '] / (data[' Cash from Operating Activity '] + data[' Other Liabilities '])
data['Current Ratio'] = data[' Total .1'] / data[' Other Liabilities ']
data['Operating Cash Flow Ratio'] = data[' Cash from Operating Activity '] / data[' Other Liabilities ']
data['Free Cash Flow'] = data[' Cash from Operating Activity '] - data[' Cash & Bank ']
data['Asset Turnover Ratio'] = data[' Sales '] / data[' Net Block ']
data['EBITDA'] = data[' Sales '] - data[' Other Expenses '] - data[' Employee Cost '] - data[' Depreciation ']
data['Total Shareholder Return (TSR)'] = (data[' PRICE: '] / data[' PRICE: '].shift(1)) - 1

# Function for robust normalization
def robust_normalization(series):
    mean = series.mean()
    std = series.std()
    normalized = ((series - mean) / std) * 10 + 50  # Scale to have mean around 50
    return np.clip(normalized, 0, 100)  # To ensure values stay within bounds

# Sentiment Analysis Columns
data['Combined Profitability and Growth Sentiment'] = robust_normalization(
    (data['Net Profit Margin'] + data['Return on Equity (ROE)'] + data[' Sales '].pct_change().fillna(0) + data[' Net profit '].pct_change().fillna(0)) / 4
)

data['Liquidity Sentiment'] = robust_normalization(
    (data['Current Ratio'] + data['Operating Cash Flow Ratio']) / 2
)

data['Leverage Sentiment'] = robust_normalization(
    1 - (data['Debt to Equity Ratio'] / data['Debt to Equity Ratio'].max())
)

data['Combined Cash Flow and Dividend Sentiment'] = robust_normalization(
    (data['Free Cash Flow'] + (data[' Dividend Amount '] / data[' Net profit ']).fillna(0)) / 2
)

data['Valuation Sentiment'] = robust_normalization(
    1 - (data[' PRICE: '] / (data[' Net profit '] / data[' No. of Equity Shares ']))
)

data['Combined Investment and Operational Efficiency Sentiment'] = robust_normalization(
    (data['Asset Turnover Ratio'] + data['EBITDA'] / data[' Sales ']) / 2
)

data['Market Performance Sentiment'] = robust_normalization(data['Total Shareholder Return (TSR)'])

# Cumulative Score Calculation
weights = {
    'Combined Profitability and Growth Sentiment': 0.5,
    'Liquidity Sentiment': 0.2,
    'Leverage Sentiment': 0.1,
    'Combined Cash Flow and Dividend Sentiment': 0.05,
    'Valuation Sentiment': 0.1,
    'Combined Investment and Operational Efficiency Sentiment': 0.05,
    'Market Performance Sentiment': 0.1
}

# Calculate the weighted cumulative score
data['Cumulative Score'] = (
    data['Combined Profitability and Growth Sentiment'] * weights['Combined Profitability and Growth Sentiment'] +
    data['Liquidity Sentiment'] * weights['Liquidity Sentiment'] +
    data['Leverage Sentiment'] * weights['Leverage Sentiment'] +
    data['Combined Cash Flow and Dividend Sentiment'] * weights['Combined Cash Flow and Dividend Sentiment'] +
    data['Valuation Sentiment'] * weights['Valuation Sentiment'] +
    data['Combined Investment and Operational Efficiency Sentiment'] * weights['Combined Investment and Operational Efficiency Sentiment'] +
    data['Market Performance Sentiment'] * weights['Market Performance Sentiment']
)

# Ensure Cumulative Score is between 0 and 100 using robust normalization
data['Cumulative Score'] = robust_normalization(data['Cumulative Score'])

# Display the DataFrame with the new sentiment columns and cumulative score
print(data[['Combined Profitability and Growth Sentiment', 'Liquidity Sentiment', 'Leverage Sentiment',
            'Combined Cash Flow and Dividend Sentiment', 'Valuation Sentiment',
            'Combined Investment and Operational Efficiency Sentiment', 'Market Performance Sentiment',
            'Cumulative Score']])


# Create a new DataFrame with Report Date, Year, and Cumulative Score
results = data[['Cumulative Score']].reset_index()
results['Year'] = results['Report Date'].dt.year  # Extract the year from Report Date

# Rearrange columns to have Year first
results = results[['Report Date', 'Year', 'Cumulative Score']]

# Save to a CSV file
results.to_csv('results_balance_sheet.csv', index=False)




             Combined Profitability and Growth Sentiment  Liquidity Sentiment  \
Report Date                                                                     
2015-03-01                                     37.620086            44.643654   
2016-03-01                                     39.531949            48.391340   
2017-03-01                                     41.436287            41.161373   
2018-03-01                                     43.972894            56.445165   
2019-03-01                                     46.302415            52.216302   
2020-03-01                                     50.036871            54.264035   
2021-03-01                                     53.447046            57.573487   
2022-03-01                                     58.167699            57.085940   
2023-03-01                                     64.216914            60.677372   
2024-03-01                                     65.267838            27.541332   

             Leverage Senti

# Volume based Liquidity Prediction

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

# Step 1: Load the CSV and handle BOM (byte-order mark)
train_data = pd.read_csv('/content/hdbk_ohlc.csv', encoding='utf-8-sig')  # 'utf-8-sig' removes BOM

# Load the Excel file and specify the correct sheet if needed
excel_file = pd.ExcelFile('/content/CONSOLIDATED.xlsx')
print("Available sheets:", excel_file.sheet_names)  # Check available sheets

# Load the desired sheet (replace 'Sheet1' with the actual sheet name if different)
prediction_dates = pd.read_excel(excel_file, sheet_name='Sheet1')

# Step 2: Clean column names to avoid spacing or case issues
train_data.columns = train_data.columns.str.strip().str.upper()
prediction_dates.columns = prediction_dates.columns.str.strip().str.upper()

# Step 3: Verify 'DATE' column exists in prediction_dates and convert to datetime
if 'DATE' in prediction_dates.columns:
    prediction_dates['DATE'] = pd.to_datetime(prediction_dates['DATE'], errors='coerce')
    print("First few dates:", prediction_dates['DATE'].head())
else:
    raise KeyError("'DATE' column not found in prediction_dates. Please check the dataset.")

# Step 4: Ensure the train data has the correct column ('F') and convert it to datetime
if 'F' in train_data.columns:  # After cleaning, the BOM should be removed
    train_data['F'] = pd.to_datetime(train_data['F'], errors='coerce')
else:
    raise KeyError("'F' column not found in train_data after cleaning.")

# Step 5: Merge the datasets based on the DATE columns
merged_data = pd.merge(train_data, prediction_dates, left_on='F', right_on='DATE', how='inner')
print("Merged data sample:")
print(merged_data.head())

# Step 6: Prepare data for LSTM model (using 'OPEN' price as the target for forecasting)
if 'OPEN' not in merged_data.columns:
    raise KeyError("'OPEN' column not found. Please ensure your data contains this column.")

# Select features and target variable
data = merged_data[['OPEN']]
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# Step 7: Create sequences for LSTM (use past 60 days to predict the next day)
def create_sequences(data, time_step=60):
    X, y = [], []
    for i in range(len(data) - time_step - 1):
        X.append(data[i:i + time_step, 0])
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)

X, y = create_sequences(scaled_data)
X = X.reshape((X.shape[0], X.shape[1], 1))

# Step 8: Define and compile the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))

model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# Step 9: Train the LSTM model
history = model.fit(X, y, epochs=10, batch_size=32, validation_split=0.1, verbose=1)

# Step 10: Make predictions (optional)
predictions = model.predict(X)
predictions = scaler.inverse_transform(predictions)

# Step 11: Visualize the results
plt.figure(figsize=(10, 6))
plt.plot(data.values, label='Actual Open Price')
plt.plot(np.arange(60, 60 + len(predictions)), predictions, label='Predicted Open Price')
plt.xlabel('Days')
plt.ylabel('Open Price')
plt.legend()
plt.show()
