In [58]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
import gradio as gr
from datetime import timedelta
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [59]:

#functions 
def analyze_tweets(tweets):
    #delimiter 
    tweet_list = tweets.split('|||')
       
    compound_scores = [sid.polarity_scores(tweet.strip())['compound'] for tweet in tweet_list if tweet.strip()]
       
    if compound_scores:
        average_sentiment = sum(compound_scores) / len(compound_scores)
    else:
        return "No  tweets entered."
    
    return f"The average compound sentiment score for the entered tweets is: {average_sentiment}"


def create_sequences(data, seq_length):
    sequences = []
    labels = []
    for i in range(seq_length, len(data)):
        past_60_days = data[i-seq_length:i, 0]
        current_features = data[i, [1, 2, 3]]
        sequence = np.concatenate((past_60_days, current_features), axis=0)
        sequences.append(sequence)
        labels.append(data[i, 0])
    return np.array(sequences), np.array(labels)

def fetch_recent_stock_data(stock_symbol, period='3mo'):
    stock_data = yf.download(stock_symbol, period=period)
    if stock_data.empty:
        raise ValueError(f"No data found for {stock_symbol}.")
    return stock_data

#close prediction and caculate dates
def predict_next_day_close_yfinance(stock_symbol, sentiment_score, number_of_tweets):
    stock_data = fetch_recent_stock_data(stock_symbol)
    if len(stock_data) < 60:
        raise ValueError(f"Not enough data available for {stock_symbol}.")
    
    past_60_days_close = stock_data['Close'][-60:].values
    most_recent_data = stock_data.iloc[-1]
    open_price = most_recent_data['Open']
    
    past_60_days_scaled = scaler.transform(np.column_stack((past_60_days_close, np.zeros((60, 3)))))[0:60, 0]
    current_features_scaled = scaler.transform([[0, sentiment_score, number_of_tweets, open_price]])[0, 1:]
    
    input_sequence = np.concatenate((past_60_days_scaled, current_features_scaled), axis=0)
    input_sequence = np.reshape(input_sequence, (1, 1, len(input_sequence)))
    
    predicted_price_scaled = model.predict(input_sequence)
    predicted_price = scaler.inverse_transform([[predicted_price_scaled[0][0], 0, 0, 0]])[0][0]

    #calculate the next trading day
    latest_date = stock_data.index[-1]
    next_prediction_date = latest_date + timedelta(days=1)
    
    return predicted_price, next_prediction_date

#train model on selected stock
def train_model(stock_symbol):
    stock_specific_data = merged_data[merged_data['Stock Name'] == stock_symbol].sort_values(by='Date')
    
    
    training_columns = ['Close', 'Average_Sentiment', 'Number_of_Tweets', 'Open']
    training_data = stock_specific_data[training_columns].dropna()

    #scaling
    global scaler #global to be used across functions
    scaler = MinMaxScaler()
    scaled_training_data = scaler.fit_transform(training_data)

    #sequences
    sequence_length = 60
    X_train, y_train = create_sequences(scaled_training_data, sequence_length)
    X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))

   
    x_train, x_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

    #LSTM
    global model #global to be used across functions
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=False, input_shape=(x_train.shape[1], x_train.shape[2])))
    model.add(Dropout(0.2))
    model.add(Dense(units=25))
    model.add(Dense(units=1))

    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(x_train, y_train, batch_size=1, epochs=10, validation_data=(x_test, y_test))

    return f"Model trained on {stock_symbol}."


def predict_stock_price(stock_symbol, sentiment_score, number_of_tweets):
    
    train_message = train_model(stock_symbol)
    
    
    predicted_price, prediction_date = predict_next_day_close_yfinance(stock_symbol, sentiment_score, number_of_tweets)
    
    return f"{train_message}\nThe predicted closing price for {stock_symbol} on {prediction_date.date()} is: {predicted_price}"

In [None]:
merged_data['Stock Name'].unique()

In [60]:
#file 
file_path = 'merged_sentiment_stock_data.csv'
merged_data = pd.read_csv(file_path)

In [62]:
#stock tab
stock_price_tab = gr.Interface(
    fn=predict_stock_price,
    inputs=[
        gr.Dropdown(label="Stock Symbol", choices=['AAPL', 'AMD', 'AMZN', 'DIS', 'GOOG', 'META', 'MSFT', 'NFLX',
       'NIO', 'PG', 'TSLA', 'TSM', 'XPEV', 'CRM', 'ZS', 'ENPH', 'PYPL',
       'BA', 'COST', 'KO', 'BX', 'F', 'INTC', 'NOC', 'VZ'], value="AAPL"),
        gr.Slider(minimum=-1.0, maximum=1.0, value=0.4, label="Sentiment Score"),
        gr.Slider(minimum=0, maximum=350, value=50, label="Number of Tweets")
    ],
    outputs="text",
    title="Stock Price Prediction",
    description="Train the model on the selected stock symbol and predict the next day's closing price."
)
#sentiment tab
sentiment_tab = gr.Interface(
    fn=analyze_tweets,
    inputs=gr.Textbox(
        lines=5,
        placeholder="Enter multiple tweets separated by '|||'",
        label="Tweets"
    ),
    outputs="text",
    title="Tweet Sentiment Analysis",
    description="Enter multiple tweets separated by '|||', and get the average compound sentiment score."
)

#tabbed interface 
app = gr.TabbedInterface([stock_price_tab, sentiment_tab], tab_names=["Stock Price Prediction", "Tweet Sentiment Analysis"])

app.launch()

Running on local URL:  http://127.0.0.1:7878

To create a public link, set `share=True` in `launch()`.


