In [3]:
# Install required packages
# pip install tensorflow pandas scikit-learn

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Embedding, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# 1. Load your data
# Assuming you have the data in a CSV or dataframe
# If reading from CSV: df = pd.read_csv('your_data.csv')
# Here let's manually simulate it based on your table

df = pd.read_csv('/content/stock_news_sentiment_dataset (3).csv')  # <-- Change if needed


In [None]:
# 2. Preprocessing
# Combine headline + description
df['full_text'] = df['Headline'].astype(str) + " " + df['Description'].astype(str)

# Encode sentiment
df['Sentiment'] = df['Sentiment'].map({'Good': 1, 'Bad': 0})

texts = df['full_text'].values
labels = df['Sentiment'].values
stock_prices = df['Stock Price'].values

# 3. Tokenization
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# 4. Padding
max_length = 100
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

# 5. Define Risk Level
def assign_risk(sentiment, stock_price):
    if sentiment == 0 and stock_price < 100:
        return 'High Risk'
    elif sentiment == 1 and stock_price < 100:
        return 'Medium Risk'
    elif sentiment == 0 and stock_price >= 100:
        return 'Medium Risk'
    elif sentiment == 1 and stock_price >= 100:
        return 'Low Risk'

df['Risk_Level'] = df.apply(lambda row: assign_risk(row['Sentiment'], row['Stock Price']), axis=1)

# 6. Split Train / Test
X_train, X_test, y_train, y_test = train_test_split(
    padded_sequences, labels, test_size=0.2, random_state=42, stratify=labels
)

# 7. Build LSTM Model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=64, input_length=max_length))
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 8. Train Model
model.fit(
    X_train, y_train,
    validation_split=0.2,
    batch_size=4,
    epochs=10,
    verbose=1
)

# 9. Predict on test set
y_pred = (model.predict(X_test) > 0.5).astype(int)
print(f"✅ Test Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")

# 10. FINAL Function for Testing New Input

def predict_sentiment_and_risk(headline, description, stock_price):
    text = headline + " " + description
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_length, padding='post')

    sentiment_pred = (model.predict(padded) > 0.5).astype(int)[0][0]

    # Map prediction
    sentiment_label = "Good" if sentiment_pred == 1 else "Bad"

    # Assign Risk
    risk = assign_risk(sentiment_pred, stock_price)

    return sentiment_label, risk

# Example Testing
headline = "3M Reports Record Revenue Growth in Healthcare Division"
description = "The company announced its healthcare segment outperformed analyst expectations with a 10% YoY revenue growth."
stock_price = 128.45

sentiment, risk = predict_sentiment_and_risk(headline, description, stock_price)

print(f"\n📢 Predicted Sentiment: {sentiment}")
print(f"⚡ Assigned Risk Level: {risk}")

Epoch 1/10




[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 40ms/step - accuracy: 0.8379 - loss: 0.4729 - val_accuracy: 0.8729 - val_loss: 0.3780
Epoch 2/10
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 45ms/step - accuracy: 0.8592 - loss: 0.4104 - val_accuracy: 0.8729 - val_loss: 0.3819
Epoch 3/10
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 41ms/step - accuracy: 0.8474 - loss: 0.4128 - val_accuracy: 0.8729 - val_loss: 0.3846
Epoch 4/10
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 40ms/step - accuracy: 0.8456 - loss: 0.4085 - val_accuracy: 0.8729 - val_loss: 0.3910
Epoch 5/10
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 40ms/step - accuracy: 0.8478 - loss: 0.4129 - val_accuracy: 0.8729 - val_loss: 0.4145
Epoch 6/10
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 44ms/step - accuracy: 0.8507 - loss: 0.4376 - val_accuracy: 0.8729 - val_loss: 0.3817
Epoch 7/10
[1m480/480[0m 

In [None]:
# 1. Predict on Test Set
y_pred_probs = model.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()

# 2. Build test DataFrame (based on original df)
# First, recreate original padded sequences' indexes
test_indexes = y_test.index if hasattr(y_test, 'index') else np.arange(len(y_test))

# Select corresponding rows from original DataFrame
df_test = df.iloc[test_indexes].copy()  # make a copy to not mess up original

# 3. Add Predictions
df_test['True_Sentiment_Label'] = df_test['Sentiment'].map({1: 'Good', 0: 'Bad'})
df_test['Predicted_Sentiment'] = y_pred
df_test['Predicted_Sentiment_Label'] = df_test['Predicted_Sentiment'].map({1: 'Good', 0: 'Bad'})

# 4. Assign Risk based on Predicted Sentiment
df_test['Predicted_Risk_Level'] = df_test.apply(lambda row: assign_risk(row['Predicted_Sentiment'], row['Stock Price']), axis=1)

# 5. Save full table
df_test.to_csv('full_test_results_with_predictions.csv', index=False)

print("✅ Full test results saved to 'full_test_results_with_predictions.csv' successfully!")


In [97]:
!pip install yfinance beautifulsoup4 requests lxml newspaper3k




In [100]:
!pip install lxml_html_clean


Collecting lxml_html_clean
  Downloading lxml_html_clean-0.4.1-py3-none-any.whl.metadata (2.4 kB)
Downloading lxml_html_clean-0.4.1-py3-none-any.whl (14 kB)
Installing collected packages: lxml_html_clean
Successfully installed lxml_html_clean-0.4.1


In [101]:
!pip install newspaper3k --no-cache-dir --force-reinstall


Collecting newspaper3k
  Downloading newspaper3k-0.2.8-py3-none-any.whl.metadata (11 kB)
Collecting beautifulsoup4>=4.4.1 (from newspaper3k)
  Downloading beautifulsoup4-4.13.3-py3-none-any.whl.metadata (3.8 kB)
Collecting Pillow>=3.3.0 (from newspaper3k)
  Downloading pillow-11.1.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.1 kB)
Collecting PyYAML>=3.11 (from newspaper3k)
  Downloading PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.1 kB)
Collecting cssselect>=0.9.2 (from newspaper3k)
  Downloading cssselect-1.3.0-py3-none-any.whl.metadata (2.6 kB)
Collecting lxml>=3.6.0 (from newspaper3k)
  Downloading lxml-5.3.2-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (3.6 kB)
Collecting nltk>=3.2.1 (from newspaper3k)
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Collecting requests>=2.10.0 (from newspaper3k)
  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting feedparser>=5.2.1 (from newspaper3k)
  Downloading fe

In [7]:

import yfinance as yf
import requests
from bs4 import BeautifulSoup
from newspaper import Article  # <-- New library for extracting full article text

# Function to fetch stock price
def fetch_stock_price(ticker):
    try:
        stock = yf.Ticker(ticker)
        todays_data = stock.history(period='1d')
        return todays_data['Close'].iloc[0]
    except Exception as e:
        print(f"❗ Error fetching stock price: {e}")
        return None

# Function to fetch latest headline + link
def fetch_latest_headline_and_link(ticker):
    try:
        url = f"https://finviz.com/quote.ashx?t={ticker}"
        headers = {'User-Agent': 'Mozilla/5.0'}
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, "html.parser")
        news_table = soup.find(id="news-table")
        first_news = news_table.find('tr')

        if first_news and first_news.a:
            headline = first_news.a.text
            link = first_news.a['href']
            return headline, link
        else:
            return "No news found", None
    except Exception as e:
        print(f"❗ Error fetching news: {e}")
        return "No news found", None

# Function to extract article description from link
def fetch_article_description(link):
    if not link:
        return "No description available."
    try:
        article = Article(link)
        article.download()
        article.parse()
        return article.text
    except Exception as e:
        print(f"❗ Error fetching article description: {e}")
        return "No description available."

# Main function
def predict_from_stock_input_full():
    print("📋 Please enter your stock symbol:")

    # Take stock symbol input
    company = input("Enter Stock Symbol (e.g., MMM): ").strip().upper()

    # Fetch stock price
    stock_price = fetch_stock_price(company)
    if stock_price is None:
        print("❗ Could not fetch stock price. Exiting.")
        return

    # Fetch latest headline + link
    headline, link = fetch_latest_headline_and_link(company)

    if link:
        description = fetch_article_description(link)
    else:
        description = headline  # fallback if no link

    print(f"\n🔎 Fetched Data for {company}:")
    print(f"Stock Price: {stock_price}")
    print(f"Latest News Headline: {headline}")
    print(f"News Description: {description[:200]}...")  # Print first 200 chars

    # Combine headline + description
    full_text = headline + " " + description

    # Tokenize and pad
    seq = tokenizer.texts_to_sequences([full_text])
    padded = pad_sequences(seq, maxlen=max_length, padding='post')

    # Predict sentiment
    sentiment_pred = (model.predict(padded) > 0.5).astype(int)[0][0]
    sentiment_label = "Good" if sentiment_pred == 1 else "Bad"

    # Predict risk
    risk_level = assign_risk(sentiment_pred, stock_price)

    print("\n✅ Prediction Complete:")
    print(f"Company: {company}")
    print(f"Predicted Sentiment: {sentiment_label}")
    print(f"Assigned Risk Level: {risk_level}")

# Call the final full function
predict_from_stock_input_full()


📋 Please enter your stock symbol:
Enter Stock Symbol (e.g., MMM): AAPL
❗ Error fetching article description: Article `download()` failed with 403 Client Error: Forbidden for url: https://seekingalpha.com/article/4773203-cii-looking-like-tempting-opportunity?utm_source=finviz.com&utm_medium=referral&feed_item_type=article on URL https://seekingalpha.com/article/4773203-cii-looking-like-tempting-opportunity?utm_source=finviz.com&utm_medium=referral&feed_item_type=article

🔎 Fetched Data for AAPL:
Stock Price: 188.3800048828125
Latest News Headline: CII: Looking Like A Tempting Opportunity
News Description: No description available....
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step

✅ Prediction Complete:
Company: AAPL
Predicted Sentiment: Good
Assigned Risk Level: Low Risk
