# List of libraries to use

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import yfinance as yf
import gradio as gr

import pyprind, os, sys, re, requests, nltk, math, pickle, gzip

from bs4 import BeautifulSoup
from dateutil.parser import parse
from collections import defaultdict

from itertools import combinations
from tqdm import tqdm
from datetime import datetime, timedelta
from dateutil.parser import parse
from matplotlib.widgets import Cursor

from nltk.tokenize import word_tokenize, sent_tokenize

from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.nn.functional import softmax
from tokenizers import Tokenizer, models, trainers, pre_tokenizers

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, TfidfTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import classification_report, accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder


ModuleNotFoundError: No module named 'gradio'

-------------

# Scraping Text Data

In [10]:
def scrape_website(url):
    # Send a GET request to the URL
    response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'})
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the HTML content of the page
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract the data you need using BeautifulSoup methods
        # For example, let's extract all the text from paragraph (p) tags
        paragraphs = soup.find_all('p')
        extracted_data = [paragraph.get_text() for paragraph in paragraphs]
        return extracted_data

    else:
        print(f"Error: Unable to fetch the page. Status code: {response.status_code}")
        return None


Error: Unable to fetch the page. Status code: 401


--------------

# Scraping using HTML class

In [9]:
def GET_TEXT(url, target_class=None):
    
    try:
        response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
        response.raise_for_status()  # Raise an error for unsuccessful requests
    except requests.exceptions.RequestException as e:
        return "0"

    soup = BeautifulSoup(response.content, 'html.parser')

    # Find elements with the specified class
    elements = soup.find_all(class_=target_class)

    if not elements:
        return "0"

    # Extract and return text content
    Result = [element.get_text(strip=True) for element in elements]
    
    if Result:
        return Result
    else:
        return []
GET_TEXT("https://www.reuters.com/world/europe/death-toll-german-christmas-market-car-ramming-rises-four-bild-reports-2024-12-21/", "")

'0'

ModuleNotFoundError: No module named 'newspaper'

---------------

In [4]:
def is_url(input_string):
    try:
        url_pattern = re.compile(
            r'^(https?://)?'                      # Optional scheme (http or https)
            r'((([a-zA-Z0-9-]+\.)+[a-zA-Z]{2,})|'  # Domain name
            r'localhost|'                          # OR localhost
            r'(\d{1,3}\.){3}\d{1,3})'              # OR IPv4 address
            r'(:\d+)?'                             # Optional port
            r'(/[-a-zA-Z0-9@:%._+~#=]*)*'          # Optional path
            r'(\?[;&a-zA-Z0-9%._+~#=-]*)?'         # Optional query
            r'(#[-a-zA-Z0-9_]*)?$'                 # Optional fragment
        )
        return bool(url_pattern.match(input_string))
    except Exception as e:
        pass

------------

# Scraping date

In [5]:
def is_valid_date(date_str):
    try:
        # Try parsing the string into a datetime object
        parse(date_str)
        return True
    except ValueError:
        # If parsing fails, the string is not a valid date
        return False
    
def extract_elements_by_class(url, target_class):
    
    try:
        response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
        response.raise_for_status()  # Raise an error for unsuccessful requests
    except requests.exceptions.RequestException as e:
        return "0"

    soup = BeautifulSoup(response.content, 'html.parser')

    # Find elements with the specified class
    elements = soup.find_all(class_=target_class)

    if not elements:
        return "0"

    # Extract and return text content
    Result = [element.get_text(strip=True)[:17] for element in elements]
    
    if is_valid_date(Result[0]):
        return Result[0]
    else:
        return "0"

-----

In [6]:
def get_publication_date(url):
    try:
        # Send HTTP request to the URL
        response = requests.get(url)
        response.raise_for_status()

        # Parse the HTML content
        soup = BeautifulSoup(response.content, 'html.parser')

        # Common metadata tags for publication dates
        date_meta_tags = [
            {"name": "pubdate"}, {"name": "publish-date"}, {"name": "creation-date"}, 
            {"name": "date"},{"property": "article:published_time"},
            {"property": "og:article:published_time"} ]

        # Search for publication date in meta tags
        for tag in date_meta_tags:
            date_tag = soup.find("meta", tag)
            if date_tag and date_tag.get("content"):
                return date_tag['content'][:10]

        # Look for visible date patterns in the page content
        possible_date_tags = soup.find_all(["time", "span", "p"])
        for tag in possible_date_tags:
            if tag.has_attr("datetime"):  # For <time> tags
                return tag['datetime'][:10]
            elif "published" in tag.get("class", []) or "date" in tag.get("class", []):
                return tag.text.strip()[:10]

        # Fallback: Search for recognizable date formats in text (not always accurate)
        text_content = soup.get_text()
        date_patterns = [  r'\b\d{4}-\d{2}-\d{2}\b',          # Format: YYYY-MM-DD
                           r'\b\d{2}/\d{2}/\d{4}\b',          # Format: MM/DD/YYYY
                           r'\b\d{1,2} \w{3,9} \d{4}\b']     # Format: DD Month YYYY

        for pattern in date_patterns:
            match = re.search(pattern, text_content)
            if match:
                return match.group()[:10]

        return "0"
    except requests.RequestException as e:
        return "0"

--------

In [7]:
def extract_date_as_date_type(text):
    # Regex pattern for Month DD, YYYY
    date_pattern = r'\b(\w+)\s(\d{1,2}),\s(\d{4})\b'
    match = re.search(date_pattern, text)
    
    if match:
        # Extract month, day, year
        month_name, day, year = match.groups()
        # Convert to datetime.date
        date_obj = datetime.strptime(f"{month_name} {day} {year}", "%B %d %Y").date()
        return date_obj
    return "0"

------------

In [8]:
def extract_dates_from_text(text):
    # Pattern to capture potential date-like segments in the text
    date_pattern = r'\b(?:\d{1,2}[/-]\d{1,2}[/-]\d{2,4}|\d{4}[/-]\d{1,2}[/-]\d{1,2}|\w+\s\d{1,2},\s\d{4})\b'
    
    # Find all matches
    potential_dates = re.findall(date_pattern, text)
    extracted_dates = []
    
    for date_str in potential_dates:
        try:
            # Parse the date string to a datetime object
            parsed_date = parse(date_str, fuzzy=True).date()
            extracted_dates.append(parsed_date)
        except ValueError:
            # Skip strings that can't be parsed as dates
            continue
    
    return extracted_dates

--------------

In [14]:
def convert_date(date_str):
    # If the input is a list containing a datetime.date object, extract the date and format it
    if isinstance(date_str, list) and isinstance(date_str[0], date):
        return date_str[0].strftime('%Y-%m-%d')

    # Try parsing the date with the '%Y-%m-%d' format first
    try:
        date_obj = datetime.strptime(date_str, '%Y-%m-%d')
        return date_obj.strftime('%Y-%m-%d')
    except ValueError:
        pass
    
    # Try parsing the date with the other formats and convert it to '%Y-%m-%d'
    formats = ['%m/%d/%Y', '%d-%b-%y', '%d-%b-%Y', '%a, %b %d, %Y',
               '%B %d, %Y,',"%B %d, %Y", '%a, %b %d, %Y,', "%b %d, %Y", "%b %d, %Y,",
              "%d-%b-%y", "%a, %b %d, %Y, %I:%M %p", "%a, %b %d, %Y"]
    for fmt in formats:
        try:
            date_obj = datetime.strptime(date_str, fmt)
            return date_obj.strftime('%Y-%m-%d')
        except ValueError:
            continue
    
    # If none of the formats match, return None
    return None

--------

# Add New Links to scrapping it

In [10]:
New_Links = [""]

Links = pd.read_csv("News_Links.csv")
update_links = {"Links": []}

for i in New_Links:
    if i not in Links["Links"]:
        update_links["Links"].append(i)
        
New_Links = pd.DataFrame(update_links)
New_Links = pd.concat([Links, New_Links]).drop_duplicates(subset="Links").dropna()
New_Links.to_csv("News_Links.csv", index=False)

----------

# Extract Date of news

In [12]:
# Read the existing data from the CSV
Text = pd.read_csv("News_Data.csv")

# Define a function to extract dates based on various conditions
def extract_dates(Text):
    for i in tqdm(range(len(Text)), ncols=100):
        try:
            link = Text["Link"][i]
            current_date = Text["Date"][i]
            
            # Proceed if the date is not already filled
            if current_date == "0" or np.isnan(current_date):
                # Extract date for ABC News
                if "abcnews" in link:
                    Text.loc[i, "Date"] = extract_elements_by_class(link, "VZTD mLASH gpiba".strip())
                
                # Extract date for Yahoo Finance
                elif "yahoo" in link:
                    Text.loc[i, "Date"] = extract_elements_by_class(link, "byline-attr-meta-time".strip())
                
                # Extract date for CNN News
                elif "cnn" in link:
                    timestamp = GET_TEXT(link, "timestamp vossi-timestamp")
                    Text.loc[i, "Date"] = extract_date_as_date_type(timestamp[0]) if timestamp else "0"
                
                # Extract date for CoinDesk
                elif "coindesk" in link:
                    content = GET_TEXT(link, "Noto_Sans_xs_Sans-400-xs flex gap-4 text-charcoal-600 flex-col md:flex-row")[0][:12]
                    Text.loc[i, "Date"] = content
                
                # Extract date for Tesls
                elif "teslarati" in link:
                    content = GET_TEXT(link, "post-date updated")[0]
                    Text.loc[i, "Date"] = content
                elif "cnbc" in link:
                    content = get_publication_date(link)
                    Text.loc[i, "Date"] = content

                # Retain existing date if no conditions met
                else:
                    Text.loc[i, "Date"] = current_date

        except Exception as e:
            pass  # Log or handle the exception if needed

    return Text



# Update the DataFrame with the extracted dates
Text = extract_dates(Text)

# Save the updated data back to CSV
Text.to_csv("News_Data.csv", index=False)

100%|█████████████████████████████████████████████████████████| 11931/11931 [15:05<00:00, 13.18it/s]


------------

# Convert date to same formate

In [16]:
Text = pd.read_csv("News_Data.csv")
for i in tqdm(range(len(Text)), ncols=100):
    try:
        Text.loc[i, "Date"] = convert_date(Text["Date"][i])
    except Exception as e:
        pass
Text.to_csv("News_Data.csv", index=False)

100%|███████████████████████████████████████████████████████| 11931/11931 [00:01<00:00, 6420.06it/s]


--------------

# Remove duplicates patterns

In [None]:
class PatternExtractor:
    def __init__(self, file_path, min_words=10, occurrence_threshold=4):
        self.file_path = file_path
        self.min_words = min_words
        self.occurrence_threshold = occurrence_threshold
        self.data = self.load_data()

    def load_data(self):
        """Loads dataset from CSV file."""
        return pd.read_csv(self.file_path)

    def preprocess_text(self, text):
        """Cleans text by removing extra spaces, punctuation, and converting to lowercase."""
        text = re.sub('<[^>]*>', '', str(text))
        emoticons = re.findall('(?::|;|=)(?:-)?(?:\(|\)|D|P)', text)
        text = re.sub('[\W]+', ' ', text.lower()) + ' '.join(emoticons).replace('-', '')
        return text.lower()

    def extract_patterns_from_text(self, text):
        """Extracts patterns from a single article."""
        sentences = sent_tokenize(text)
        patterns = set()
        for sentence in sentences:
            words = word_tokenize(sentence)
            for i in range(len(words) - self.min_words + 1):
                patterns.add(' '.join(words[i:i + self.min_words]))
        return patterns

    def find_common_patterns(self):
        """Finds common patterns across articles."""
        pattern_counts = defaultdict(int)
        for article in tqdm(self.data["Text"], desc="Processing Articles"):
            preprocessed_text = self.preprocess_text(article)
            patterns = self.extract_patterns_from_text(preprocessed_text)
            for pattern in patterns:
                pattern_counts[pattern] += 1
        return {pattern: count for pattern, count in pattern_counts.items() if count > 1}

    def remove_patterns_from_text(self, text, patterns):
        """Removes patterns from text."""
        for pattern in patterns:
            text = re.sub(re.escape(pattern), "", text)
        return text

    def update_data(self):
        """Updates data by removing high-frequency patterns."""
        common_patterns = self.find_common_patterns()
        high_frequency_patterns = [pattern for pattern, count in common_patterns.items() if count > self.occurrence_threshold]
        self.data["Text"] = self.data["Text"].apply(lambda x: self.remove_patterns_from_text(x, high_frequency_patterns))
        self.data.to_csv("News_Data_Updated.csv", index=False)
        print("Updated News_Data_Updated.csv saved successfully!")


if __name__ == "__main__":
    nltk.download('punkt')
    extractor = PatternExtractor("News_Data.csv")
    extractor.update_data()

----------

# Using FinBert model for sentiment texts and label it

In [17]:
Text = pd.read_csv("News_Data.csv")
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")

def get_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    outputs = model(**inputs)
    probabilities = softmax(outputs.logits, dim=1)
    sentiment_labels = ["neutral", "positive", "negative"]
    return sentiment_labels[probabilities.argmax()]

for i in tqdm(range(len(Text["Sentiment"])), ncols=100):
    try:
        np.isnan(Text["Sentiment"][i])
        Text.loc[i, "Sentiment"] = get_sentiment(Text["Text"][i])
    except Exception as e:
        pass
# Initialize LabelEncoder
encoder = LabelEncoder()
# Encode sentiment column
Text["Labeling_Sentiment"] = encoder.fit_transform(Text["Sentiment"])
Text.to_csv("News_Data.csv", index=False)

  return torch.load(checkpoint_file, map_location="cpu")
100%|█████████████████████████████████████████████████████████| 11931/11931 [09:34<00:00, 20.78it/s]


-------------

# Remove duplicates, empity, and zero values

In [18]:
# Read the data
Text = pd.read_csv("News_Data.csv")

# Initialize an empty dictionary to store unique data
Refresh = {"Link": [], "Text": [], "Sentiment": [], "Date": [], "Labeling_Sentiment": []}

# Use the 'drop_duplicates' function for unique rows based on 'Link' and 'Text'
Text_filtered = Text.drop_duplicates(subset=["Link", "Text"])

# Filter rows where the Link is valid (is_url returns True)
Text_filtered = Text_filtered[Text_filtered["Link"].apply(is_url).fillna(False)]

# Drop rows where Text["Text"] == "0"
Text_filtered = Text_filtered[Text_filtered["Text"] != "0"]
Text_filtered = Text_filtered[Text_filtered["Date"] != "0"]
Text_filtered = Text_filtered[Text_filtered["Date"] != None]


# Append the filtered rows into Refresh dictionary
Refresh["Link"] = Text_filtered["Link"].tolist()
Refresh["Text"] = Text_filtered["Text"].tolist()
Refresh["Sentiment"] = Text_filtered["Sentiment"].tolist()
Refresh["Date"] = Text_filtered["Date"].tolist()
Refresh["Labeling_Sentiment"] = Text_filtered["Labeling_Sentiment"].tolist()

# Create DataFrame from the dictionary
New_Data = pd.DataFrame(Refresh).dropna()

# Save the filtered and updated DataFrame
New_Data.to_csv("News_Data.csv", index=False)


--------------

# Training Byte-Pair Encoding: Subword-based tokenization algorithm with saveing progress in BPE file

In [31]:
class BPE():
    """Byte-Pair Encoding: Subword-based tokenization algorithm."""
    
    def __init__(self, corpus, vocab_size):
        """Initialize BPE tokenizer."""
        self.corpus = corpus
        self.vocab_size = vocab_size
        
        # pre-tokenize the corpus into words, BERT pre-tokenizer is used here
        self.tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
        self.word_freqs = defaultdict(int)
        self.splits = {}
        self.merges = {}
    
    
    def train(self, checkpoint_path=None, checkpoint_interval=10):
        """Train BPE tokenizer with checkpoint saving."""
        # compute the frequencies of each word in the corpus
        for text in self.corpus:
            words_with_offsets = self.tokenizer.backend_tokenizer.pre_tokenizer.pre_tokenize_str(text)
            new_words = [word for word, offset in words_with_offsets]
            for word in new_words:
                self.word_freqs[word] += 1

        # compute the base vocabulary of all characters in the corpus
        alphabet = []
        for word in self.word_freqs.keys():
            for letter in word:
                if letter not in alphabet:
                    alphabet.append(letter)
        alphabet.sort()

        # add the special token </w> at the beginning of the vocabulary
        vocab = ["</w>"] + alphabet.copy()

        # split each word into individual characters before training
        self.splits = {word: [c for c in word] for word in self.word_freqs.keys()}

        # merge the most frequent pair iteratively until the vocabulary size is reached
        iteration = 0
        while len(vocab) < self.vocab_size:
            # compute the frequency of each pair
            pair_freqs = self.compute_pair_freqs()

            # find the most frequent pair
            best_pair = ""
            max_freq = None
            for pair, freq in pair_freqs.items():
                if max_freq is None or max_freq < freq:
                    best_pair = pair
                    max_freq = freq

            # merge the most frequent pair
            self.splits = self.merge_pair(*best_pair)
            self.merges[best_pair] = best_pair[0] + best_pair[1]
            vocab.append(best_pair[0] + best_pair[1])

            # Save a checkpoint
            iteration += 1
            if checkpoint_path and iteration % checkpoint_interval == 0:
                self.save_checkpoint(checkpoint_path, iteration)
        return self.merges

    def save_checkpoint(self, filepath, iteration):
        """Save the current training state as a checkpoint."""
        with open(filepath, 'wb') as f:
            pickle.dump({
                'merges': self.merges,
                'splits': self.splits,
                'word_freqs': self.word_freqs,
                'iteration': iteration,
                'vocab_size': self.vocab_size,
                'corpus': self.corpus,
            }, f)
        print(f"Checkpoint saved at iteration {iteration} to {filepath}")


    def compute_pair_freqs(self):
        """Compute the frequency of each pair."""

        pair_freqs = defaultdict(int)
        for word, freq in self.word_freqs.items():
            split = self.splits[word]
            if len(split) == 1:
                continue
            for i in range(len(split) - 1):
                pair = (split[i], split[i + 1])
                pair_freqs[pair] += freq
        return pair_freqs


    def merge_pair(self, a, b):
        """Merge the given pair."""

        for word in self.word_freqs:
            split = self.splits[word]
            if len(split) == 1:
                continue
            i = 0
            while i < len(split) - 1:
                if split[i] == a and split[i + 1] == b:
                    split = split[:i] + [a + b] + split[i + 2 :]
                else:
                    i += 1
            self.splits[word] = split
        return self.splits
    
Text = pd.read_csv("News_Data.csv")
bpe = BPE(corpus= Text["Text"], vocab_size=100000)
bpe.train(checkpoint_path="BPE.pkl", checkpoint_interval=1000)

----------------------

# Logistic regression Model

In [52]:
Text = pd.read_csv("News_Data.csv")[:1000]

# Load the saved checkpoint (dictionary)
with open("BPE.pkl", "rb") as f:
    checkpoint = pickle.load(f)

# Extract necessary data from the checkpoint
merges = checkpoint['merges']
splits = checkpoint['splits']
word_freqs = checkpoint['word_freqs']
vocab_size = checkpoint['vocab_size']
corpus = checkpoint['corpus']


# Manually handle the tokenization process based on the loaded dictionary
def tokenize_with_bpe(text, merges, splits):
    # Pre-tokenize the text (splitting into words)
    pre_tokenized_text = text.split()  # This is a simple split, you can modify to use any tokenizer
    splits_text = [[l for l in word] for word in pre_tokenized_text]

    # Merge based on the merges dictionary
    for pair, merge in merges.items():
        for idx, split in enumerate(splits_text):
            i = 0
            while i < len(split) - 1:
                if split[i] == pair[0] and split[i + 1] == pair[1]:
                    split = split[:i] + [merge] + split[i + 2 :]
                else:
                    i += 1
            splits_text[idx] = split

    # Return the merged text (flattened list)
    result = sum(splits_text, [])
    return result

# Sample text data and labels
text_data = Text["Text"]

# Tokenize the text using the dictionary-based BPE
tokenized_data = [" ".join(tokenize_with_bpe(text, merges, splits)) for text in tqdm(text_data, ncols=100)]

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(tokenized_data)
y = Text["Labeling_Sentiment"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Train logistic regression
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate on Training data
y_pred = model.predict(X_test)
accuracy_training = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy_training:.2f}")

f1_training = f1_score(y_test, y_pred, average="weighted")  # Use 'weighted' for multi-class
print(f"F1-Score: {f1_training:.2f}")

100%|█████████████████████████████████████████████████████████| 1200/1200 [1:24:38<00:00,  4.23s/it]


Accuracy: 0.73
F1-Score: 0.72


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


------------------

# Display Graphs

In [3]:
import pandas as pd
import yfinance as yf
from datetime import datetime
import matplotlib.pyplot as plt
from matplotlib.widgets import Cursor
import gradio as gr

# Function to calculate RSI
def calculate_rsi(data, window=14):
    delta = data.diff(1)
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    
    avg_gain = gain.rolling(window=window, min_periods=1).mean()
    avg_loss = loss.rolling(window=window, min_periods=1).mean()
    
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

# Function to analyze stock data
def analyze_stock(keyword):
    # Load stock price data
    stock_data = yf.download(keyword, start="2024-10-01", end=datetime.today())["Close"]
    
    # Load and filter news sentiment data
    news_data = pd.read_csv("News_Data.csv")
    filtered_news = news_data[news_data["Text"].str.contains(keyword, case=False, na=False)]
    
    # Aggregate sentiment by date
    sentiment = (
        filtered_news.groupby('Date')['Labeling_Sentiment']
        .mean()
        .reset_index()
        .set_index("Date")
    )
    
    # Combine sentiment with stock prices
    analysis_df = sentiment.copy()
    analysis_df["Price"] = stock_data
    analysis_df.dropna(inplace=True)
    
    # Calculate indicators
    analysis_df["Moving Average"] = analysis_df["Price"].rolling(window=5).mean()
    analysis_df["RSI"] = calculate_rsi(analysis_df["Price"], window=5)
    
    # Create plots
    fig, axes = plt.subplots(3, 1, figsize=(14, 16), gridspec_kw={"height_ratios": [3, 1, 1]})
    
    # Plot 1: Closing Price and Moving Average
    ax1 = axes[0]
    ax1.plot(analysis_df.index, analysis_df["Price"], label="Closing Price", color="blue", linewidth=2)
    ax1.plot(analysis_df.index, analysis_df["Moving Average"], label="5-Day Moving Average", color="orange", linewidth=2)
    ax1.set_title(f"{keyword} Exchange Rate: Price and Moving Average", fontsize=16, fontweight='bold')
    ax1.set_xlabel("Date", fontsize=14)
    ax1.set_ylabel("Price (USD)", fontsize=14)
    ax1.grid(True, linestyle='--', alpha=0.6)
    ax1.legend(fontsize=12)
    ax1.tick_params(axis='x', rotation=45)
    Cursor(ax1, useblit=True, color='red', linewidth=1, linestyle='--')
    
    # Plot 2: RSI
    ax2 = axes[1]
    ax2.plot(analysis_df.index, analysis_df["RSI"], label="RSI", color="purple", linewidth=2)
    ax2.axhline(70, color="red", linestyle="--", label="Overbought (70)")
    ax2.axhline(30, color="green", linestyle="--", label="Oversold (30)")
    ax2.set_title("Relative Strength Index (RSI)", fontsize=14, fontweight='bold')
    ax2.set_xlabel("Date", fontsize=12)
    ax2.set_ylabel("RSI", fontsize=12)
    ax2.grid(True, linestyle='--', alpha=0.6)
    ax2.legend(fontsize=10)
    ax2.tick_params(axis='x', rotation=45)
    
    # Plot 3: Sentiment Analysis
    ax3 = axes[2]
    ax3.plot(analysis_df.index, analysis_df["Labeling_Sentiment"], label="Sentiment", color="green", linewidth=2)
    sentiment_label = (
        "Neutral Sentiment (1)" if 0.5 <= analysis_df["Labeling_Sentiment"].iloc[-1] <= 1.5 else
        "Positive Sentiment (2)" if analysis_df["Labeling_Sentiment"].iloc[-1] > 1.5 else
        "Negative Sentiment (0)"
    )
    ax3.axhline(0, color="black", linestyle="--", label=sentiment_label)
    ax3.set_title("Sentiment Analysis", fontsize=14, fontweight='bold')
    ax3.set_xlabel("Date", fontsize=12)
    ax3.set_ylabel("Sentiment Score", fontsize=12)
    ax3.grid(True, linestyle='--', alpha=0.6)
    ax3.legend(fontsize=10)
    ax3.tick_params(axis='x', rotation=45)
    
    # Adjust layout and save the plot
    plt.tight_layout()
    output_path = "output_plot.png"
    plt.savefig(output_path)
    plt.close(fig)
    return output_path

# Gradio interface
gui = gr.Interface(
    fn=analyze_stock,
    inputs=gr.Textbox(label="Enter Stock Name"),
    outputs=gr.Image(type="filepath", label="Analysis Plot"),
    title="Stock Sentiment and Price Analysis",
    description="Analyze stock sentiment and price trends based on user input."
)

# Launch the app
gui.launch()


ModuleNotFoundError: No module named 'gradio'