In [1]:
# Shell 1: Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Custom metric to calculate RMSE
def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))

print("Libraries imported and custom RMSE metric defined.")


Libraries imported and custom RMSE metric defined.


In [11]:
# Shell 2: Fetch stock price data and sentiment data
stock_symbols = ['RELIANCE.NS', 'SBIN.NS', 'HDFCBANK.NS', 'TCS.NS']
api_key = 'NPWUSQC1723OZ4YW'
stock_data = {}
sentiment_data = {}

# Step 1: Fetch stock price data using Yahoo Finance
for symbol in stock_symbols:
    stock_data[symbol] = yf.download(symbol, start='2019-01-01', end='2024-01-01')
    stock_data[symbol].to_csv(f'{symbol}_prices.csv')  # Optional saving as CSV

# Step 2: Fetch sentiment data using Alpha Vantage API
for symbol in stock_symbols:
    url_sentiment = f'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers={symbol}&apikey={api_key}'
    response_sentiment = requests.get(url_sentiment)
    
    if response_sentiment.status_code == 200:
        sentiment_data[symbol] = response_sentiment.json()
        print(f"Fetched sentiment data for {symbol}")
    else:
        print(f"Failed to fetch sentiment data for {symbol}: {response_sentiment.status_code}")
    
    time.sleep(15)  # To handle API rate limits

# Optionally save sentiment data
for symbol, data in sentiment_data.items():
    df_sentiment = pd.DataFrame(data.get('feed', []))  # Assuming sentiment data is under 'feed'
    df_sentiment.to_csv(f'{symbol}_sentiment.csv', index=False)

print("Stock prices and sentiment data fetched and saved.")


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Fetched sentiment data for RELIANCE.NS
Fetched sentiment data for SBIN.NS
Fetched sentiment data for HDFCBANK.NS
Fetched sentiment data for TCS.NS
Stock prices and sentiment data fetched and saved.


In [17]:
stock_data

{'RELIANCE.NS':                    Open         High          Low        Close    Adj Close  \
 Date                                                                          
 2019-01-01  1028.852905  1030.727295  1015.000732  1024.966919  1000.962952   
 2019-01-02  1019.023804  1030.453003  1006.680298  1011.617737   987.926453   
 2019-01-03  1012.623474  1019.115234   996.714111   999.137085   975.738098   
 2019-01-04  1003.388733  1009.834778   988.485107  1004.531616   981.006287   
 2019-01-07  1012.166321  1022.635437  1006.680298  1010.109070   986.453125   
 ...                 ...          ...          ...          ...          ...   
 2023-12-22  2559.600098  2580.899902  2547.649902  2565.050049  2556.373779   
 2023-12-26  2568.000000  2591.949951  2562.699951  2578.050049  2569.329834   
 2023-12-27  2582.000000  2599.899902  2573.100098  2586.850098  2578.100098   
 2023-12-28  2589.800049  2612.000000  2586.850098  2605.550049  2596.736816   
 2023-12-29  2611.100098 

In [12]:
# Shell 3: Load bond data and preprocess
bond_data = pd.read_csv("/Users/raghavgarg/Downloads/bond.csv")
bond_data['Date'] = pd.to_datetime(bond_data['Date'], format='%d-%m-%Y')
bond_data['Change %'] = bond_data['Change %'].str.rstrip('%').astype('float') / 100.0

# Convert other columns to float as necessary
bond_data.iloc[:, 1:5] = bond_data.iloc[:, 1:5].astype(float)

print("Bond data loaded and preprocessed.")


Bond data loaded and preprocessed.


In [9]:
# Shell 4: Load and process stock data
#stock_data = pd.read_csv("stock_data.csv")  # Replace with actual stock data file
stock_data['Date'] = pd.to_datetime(stock_data['Date'])

# Example: Moving averages
stock_data['MA_20'] = stock_data['Close'].rolling(window=20).mean()
stock_data['MA_50'] = stock_data['Close'].rolling(window=50).mean()

# Preprocess sentiment data (assuming you have preprocessed sentiment features)
sentiment_data = pd.read_csv("sentiment_data.csv")  # Replace with actual sentiment data file
sentiment_data['Date'] = pd.to_datetime(sentiment_data['Date'])

# Merge all data (align on 'Date')
merged_data = pd.merge(stock_data, sentiment_data, on='Date', how='inner')
merged_data = pd.merge(merged_data, bond_data, on='Date', how='inner')
print("Stock, sentiment, and bond data merged.")


KeyError: 'Date'

In [18]:
# Shell 3: Prepare stock and sentiment data for merging and modeling
merged_data = {}

for symbol in stock_symbols:
    stock_df = stock_data[symbol]
    stock_df['Date'] = stock_df.index  # Extract date from the index
    stock_df = stock_df[['Date', 'Close']].copy()

    # Handle missing or malformed sentiment data
    if symbol in sentiment_data and 'feed' in sentiment_data[symbol]:
        sentiment_df = pd.DataFrame(sentiment_data[symbol].get('feed', []))

        # Ensure the sentiment data contains a valid time field for 'Date'
        if 'time_published' in sentiment_df.columns:
            sentiment_df['Date'] = pd.to_datetime(sentiment_df['time_published'], errors='coerce')
            sentiment_df.dropna(subset=['Date'], inplace=True)  # Remove rows with invalid dates
            sentiment_df = sentiment_df.groupby('Date').mean().reset_index()  # Average sentiment for each day
        else:
            print(f"No 'time_published' column found in sentiment data for {symbol}. Skipping sentiment merge.")
            sentiment_df = pd.DataFrame()  # Empty DataFrame if 'time_published' is missing
    else:
        print(f"Sentiment data missing or malformed for {symbol}. Skipping sentiment merge.")
        sentiment_df = pd.DataFrame()  # Empty DataFrame if no valid sentiment data

    # Merge stock and sentiment data on 'Date' if sentiment data is not empty
    if not sentiment_df.empty:
        merged_df = pd.merge(stock_df, sentiment_df, on='Date', how='inner')
    else:
        merged_df = stock_df  # If no sentiment data, use only stock data

    merged_data[symbol] = merged_df  # Store the merged dataset in a dictionary
    
    # Save merged data (optional)
    merged_df.to_csv(f'{symbol}_merged_data.csv', index=False)

print("Stock and sentiment data prepared and merged for modeling.")


Sentiment data missing or malformed for RELIANCE.NS. Skipping sentiment merge.
Sentiment data missing or malformed for SBIN.NS. Skipping sentiment merge.
Sentiment data missing or malformed for HDFCBANK.NS. Skipping sentiment merge.
Sentiment data missing or malformed for TCS.NS. Skipping sentiment merge.
Stock and sentiment data prepared and merged for modeling.


In [19]:
# Shell 1: Combine stock data into a single DataFrame with the "stock_name" column
combined_stock_data = pd.DataFrame()  # Empty DataFrame to store all stock data

for symbol in stock_symbols:
    # Add a 'stock_name' column to each stock's DataFrame
    stock_df = stock_data[symbol].copy()  # Copy to avoid modifying the original data
    stock_df['stock_name'] = symbol  # Add the stock symbol as a new column
    
    # Reset the index to make 'Date' a column
    stock_df = stock_df.reset_index()

    # Append this stock's data to the combined DataFrame
    combined_stock_data = pd.concat([combined_stock_data, stock_df])

# Save the combined stock data to a CSV file
combined_stock_data.to_csv('combined_stock_data.csv', index=False)

print("Combined stock data saved to 'combined_stock_data.csv'.")


ValueError: cannot insert Date, already exists

In [7]:
# Shell 1: Import necessary libraries
import pandas as pd
import requests
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import time
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Custom metric to calculate RMSE
def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))

print("Libraries imported and custom RMSE metric defined.")

# Shell 2: Fetch stock price data and sentiment data
stock_symbols = ['RELIANCE.NS', 'SBIN.NS', 'HDFCBANK.NS', 'TCS.NS']
api_key = 'NPWUSQC1723OZ4YW'
stock_data = {}
sentiment_data = {}

# Step 1: Fetch stock price data using Yahoo Finance
for symbol in stock_symbols:
    stock_data[symbol] = yf.download(symbol, start='2019-01-01', end='2024-01-01')
    stock_data[symbol].to_csv(f'{symbol}_prices.csv')  # Optional saving as CSV

# Step 2: Fetch sentiment data using Alpha Vantage API
for symbol in stock_symbols:
    url_sentiment = f'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers={symbol}&apikey={api_key}'
    response_sentiment = requests.get(url_sentiment)
    
    if response_sentiment.status_code == 200:
        sentiment_data[symbol] = response_sentiment.json()
        print(f"Fetched sentiment data for {symbol}")
    else:
        print(f"Failed to fetch sentiment data for {symbol}: {response_sentiment.status_code}")
    
    time.sleep(15)  # To handle API rate limits

# Optionally save sentiment data
for symbol, data in sentiment_data.items():
    df_sentiment = pd.DataFrame(data.get('feed', []))  # Assuming sentiment data is under 'feed'
    df_sentiment.to_csv(f'{symbol}_sentiment.csv', index=False)

print("Stock prices and sentiment data fetched and saved.")

# Shell 3: Load bond data and preprocess
bond_data = pd.read_csv("/Users/raghavgarg/Downloads/bond.csv")
bond_data['Date'] = pd.to_datetime(bond_data['Date'], format='%d-%m-%Y')
bond_data['Change %'] = bond_data['Change %'].str.rstrip('%').astype('float') / 100.0

# Convert other columns to float as necessary
bond_data.iloc[:, 1:5] = bond_data.iloc[:, 1:5].astype(float)

print("Bond data loaded and preprocessed.")

# Shell 4: Load and process stock data


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Libraries imported and custom RMSE metric defined.





Fetched sentiment data for RELIANCE.NS
Fetched sentiment data for SBIN.NS
Fetched sentiment data for HDFCBANK.NS
Fetched sentiment data for TCS.NS
Stock prices and sentiment data fetched and saved.
Bond data loaded and preprocessed.


In [13]:
df_sentiment

In [8]:
bond_data

Unnamed: 0,Date,Price,Open,High,Low,Change %
0,2024-01-01,7.196,7.207,7.207,7.192,0.0028
1,2023-12-29,7.176,7.225,7.225,7.172,-0.0043
2,2023-12-28,7.207,7.200,7.213,7.191,0.0003
3,2023-12-27,7.205,7.191,7.208,7.182,0.0031
4,2023-12-26,7.183,7.213,7.213,7.177,-0.0007
...,...,...,...,...,...,...
1210,2019-01-07,7.508,7.467,7.519,7.439,0.0081
1211,2019-01-04,7.448,7.407,7.475,7.406,0.0028
1212,2019-01-03,7.427,7.397,7.433,7.381,0.0099
1213,2019-01-02,7.354,7.409,7.415,7.350,-0.0086


In [9]:
stock_data

{'RELIANCE.NS':                    Open         High          Low        Close    Adj Close  \
 Date                                                                          
 2019-01-01  1028.852905  1030.727295  1015.000732  1024.966919  1000.963013   
 2019-01-02  1019.023804  1030.453003  1006.680298  1011.617737   987.926453   
 2019-01-03  1012.623474  1019.115234   996.714111   999.137085   975.738098   
 2019-01-04  1003.388733  1009.834778   988.485107  1004.531616   981.006287   
 2019-01-07  1012.166321  1022.635437  1006.680298  1010.109070   986.453064   
 ...                 ...          ...          ...          ...          ...   
 2023-12-22  2559.600098  2580.899902  2547.649902  2565.050049  2556.373779   
 2023-12-26  2568.000000  2591.949951  2562.699951  2578.050049  2569.329834   
 2023-12-27  2582.000000  2599.899902  2573.100098  2586.850098  2578.100098   
 2023-12-28  2589.800049  2612.000000  2586.850098  2605.550049  2596.736816   
 2023-12-29  2611.100098 

In [17]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

nltk.download('vader_lexicon')

sia = SentimentIntensityAnalyzer()

# Shell 4: Load and process stock data
for symbol, data in stock_data.items():
    data.index = pd.to_datetime(data.index)
    
    # Example: Moving averages
    data['MA_20'] = data['Close'].rolling(window=20).mean()
    data['MA_50'] = data['Close'].rolling(window=50).mean()
    
    # Extract the sentiment features
    sentiment_features = []
    for article in data['Volume']:
        sentiment = sia.polarity_scores(str(article))
        sentiment_features.append(sentiment)
    
    # Create a DataFrame with the sentiment features
    df_sentiment = pd.DataFrame(sentiment_features)
    
    # Merge the sentiment features with the stock data
    merged_data = pd.merge(data, df_sentiment, left_index=True, right_index=True)
    
    # Merge the merged data with the bond data
    merged_data = pd.merge(merged_data, bond_data, left_index=True, right_index=True)
    
    print(f"Stock, sentiment, and bond data merged for {symbol}.")
    
    # Print the merged data
    print(merged_data.head())
    
# Shell 5: Prepare data for LSTM model
# Define features and target
features = ['Open_x', 'High_x', 'Low_x', 'Volume', 'MA_20', 'MA_50', 'compound', 'pos', 'neg', 'neu']
target = 'Close'

# Scale data using MinMaxScaler
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(merged_data[features])

# Split data into training and testing sets
train_size = int(0.8 * len(scaled_data))
train_data, test_data = scaled_data[0:train_size], scaled_data[train_size:len(scaled_data)]

# Split data into input (X) and output (y)
X_train, y_train = train_data[:, 0:10], train_data[:, 10]
X_test, y_test = test_data[:, 0:10], test_data[:, 10]

# Reshape data for LSTM model
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
# Shell 6: Build and train LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(50))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=[rmse])

early_stopping = EarlyStopping(monitor='val_rmse', patience=5, min_delta=0.001)

history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

# Shell 7: Evaluate model performance
mse = model.evaluate(X_test, y_test)
print(f'MSE: {mse[0]}')
print(f'RMSE: {mse[1]}')

# Shell 8: Plot model performance
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.plot(history.history['rmse'], label='Training RMSE')
plt.plot(history.history['val_rmse'], label='Validation RMSE')
plt.legend()
plt.show()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/raghavgarg/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Stock, sentiment, and bond data merged for RELIANCE.NS.
Empty DataFrame
Columns: [Open_x, High_x, Low_x, Close, Adj Close, Volume, MA_20, MA_50, neg, neu, pos, compound, Date, Price, Open_y, High_y, Low_y, Change %]
Index: []
Stock, sentiment, and bond data merged for SBIN.NS.
Empty DataFrame
Columns: [Open_x, High_x, Low_x, Close, Adj Close, Volume, MA_20, MA_50, neg, neu, pos, compound, Date, Price, Open_y, High_y, Low_y, Change %]
Index: []
Stock, sentiment, and bond data merged for HDFCBANK.NS.
Empty DataFrame
Columns: [Open_x, High_x, Low_x, Close, Adj Close, Volume, MA_20, MA_50, neg, neu, pos, compound, Date, Price, Open_y, High_y, Low_y, Change %]
Index: []
Stock, sentiment, and bond data merged for TCS.NS.
Empty DataFrame
Columns: [Open_x, High_x, Low_x, Close, Adj Close, Volume, MA_20, MA_50, neg, neu, pos, compound, Date, Price, Open_y, High_y, Low_y, Change %]
Index: []


ValueError: Found array with 0 sample(s) (shape=(0, 10)) while a minimum of 1 is required by MinMaxScaler.