In [31]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import yfinance as yf
import gradio as gr
import os, re, requests, nltk

from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

from bs4 import BeautifulSoup
from dateutil.parser import parse
from collections import defaultdict
from tqdm import tqdm
from datetime import datetime, timedelta
from nltk.tokenize import word_tokenize, sent_tokenize
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.nn.functional import softmax
from sklearn.preprocessing import LabelEncoder

# Download necessary NLTK resources
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)

True

In [18]:
Text = pd.read_csv("News_Data.csv")
Text.shape

(8760, 5)

***********

# Extract Links

In [None]:
# Class for scraping Yahoo Finance news
class HTMLLinkExtractor:
    def __init__(self, max_links=162, timeout=10):
        """Initialize the HTMLLinkExtractor with max_links and timeout."""
        self.max_links = max_links
        self.timeout = timeout

    def fetch_html(self, url):
        """Fetch the HTML content of a URL."""
        try:
            response = requests.get(url, timeout=self.timeout)
            return BeautifulSoup(response.content, 'lxml')
        except Exception as e:
            print(f"Error fetching {url}: {e}")
            return None

    def extract_links(self, soup):
        """Extract all valid HTTP links from the BeautifulSoup object."""
        links = set()
        if soup:
            for link in soup.find_all('a', href=True):
                href = link.get('href')
                if href and href.startswith('http'):
                    links.add(href)
        return links

    def extract_html_links(self, url):
        """Extract HTML links starting from the given URL (for Yahoo Finance)."""
        links = set()
        soup = self.fetch_html(url)

        # Extract initial links
        if soup:
            links.update(self.extract_links(soup))

        # Iteratively fetch and extract links up to max_links
        for link in list(links):
            if len(links) >= self.max_links:
                break
            soup = self.fetch_html(link)
            if soup:
                links.update(self.extract_links(soup))

        # Filter links ending with '.html'
        return [link for link in links if link.endswith('.html')]

# Function for scraping CNN or CNBC links
def Scraping(url):
    Links = []
    response = requests.get(url)  # Send a request to the website
    if response.status_code == 200:  # Check if the request was successful
        soup = BeautifulSoup(response.content, 'html.parser')  # Parse the HTML content of the page
        links = soup.find_all('a', href=True)  # Find all anchor tags with 'href' attributes
        for link in links or []:  # Extract and print the links
            href = link['href']
            if href.endswith("html"):  # Filter out links to ensure they are valid URLs
                Links.append(href)
    return Links

# Load the CSV and process links
class LinkProcessor:
    def __init__(self, csv_file, extractor):
        """Initialize the LinkProcessor with the CSV file and an HTMLLinkExtractor instance."""
        self.csv_file = csv_file
        self.extractor = extractor

    def process_links(self):
        Old_Data = pd.read_csv("News_Data.csv")
        News_Links = pd.read_csv("News_Links.csv")
        update_file = {"Link": []}
        update_Links = {"Links": []}
        """Process each link in the CSV file and extract HTML links."""
        links_df = pd.read_csv(self.csv_file)
        
        for link in tqdm(np.array(links_df["Links"]), desc="Processing Links", unit="link"):
            if str(link) not in News_Links["Links"]:
                if "yahoo" in str(link):  # Scrape Yahoo Finance using HTMLLinkExtractor
                    html_links = self.extractor.extract_html_links(str(link))
                    for i in html_links:
                        update_file["Link"].append(i)
                        update_Links["Links"].append(i)
                elif "cnn" in str(link):  # Scrape CNN or CNBC using Scraping function
                    html_links = Scraping(str(link))
                    for i in html_links:
                        update_file["Link"].append(f"https://edition.cnn.com{i}")
                        update_Links["Links"].append(i)
                elif "cnbc" in str(link):
                    html_links = Scraping(str(link))
                    for i in html_links:
                        update_file["Link"].append(i)
                        update_Links["Links"].append(i)

        update_file = pd.DataFrame(update_file)
        update_Links = pd.DataFrame(update_Links)
        
        add_links = pd.concat([Old_Data, update_file]).drop_duplicates(subset="Link")
        Links = pd.concat([News_Links, update_Links]).drop_duplicates(subset="Links")
        
        add_links.to_csv("News_Data.csv", index=False)
        Links.to_csv("News_Links.csv", index=False)

# Usage
if __name__ == "__main__":
    extractor = HTMLLinkExtractor(max_links=162, timeout=10)
    processor = LinkProcessor("News_Links.csv", extractor)
    processor.process_links()


--------

# Extract Text

In [None]:
class ArticleExtractor:
    def __init__(self, url, target_class):
        self.url = url
        self.target_class = target_class
        self.headers = {'User-Agent': 'Mozilla/5.0'}

    def fetch_text(self):
        try:
            response = requests.get(self.url, headers=self.headers)
            response.raise_for_status()  # Raise an error for unsuccessful requests
        except requests.exceptions.RequestException:
            return "0"

        soup = BeautifulSoup(response.content, 'html.parser')
        elements = soup.find_all(class_=self.target_class)

        if not elements:
            return "0"

        # Extract and return text content
        result = [element.get_text(strip=True) for element in elements]

        return result if result else []

class NewsProcessor:
    def __init__(self, file_path):
        self.data = pd.read_csv(file_path)

    def process_articles(self):
        # Use tqdm to add a progress bar
        for i in tqdm(range(len(self.data)), desc="Processing Articles"):
            link = self.data["Link"][i]
            text = self.data["Text"][i]

            if isinstance(text, float):  # Check if Text is a NaN value (float)
                if "yahoo" in link:
                    extractor = ArticleExtractor(link, "article-wrap no-bb")
                elif "cnn" in link:
                    extractor = ArticleExtractor(link, "article__content")
                elif "cnbc" in link:
                    extractor = ArticleExtractor(link, "ArticleBody-articleBody")
                else:
                    continue  # Skip if no match for source
                
                # Extract text and update the dataframe
                extracted_text = extractor.fetch_text()
                self.data.loc[i, "Text"] = extracted_text

    def save_data(self, output_path):
        self.data.to_csv(output_path, index=False)

# Example usage:
file_path = "News_Data.csv"
output_path = "News_Data.csv"

news_processor = NewsProcessor(file_path)
news_processor.process_articles()
news_processor.save_data(output_path)


------

# Clean Text Data

In [None]:
# Load the data
Text = pd.read_csv("News_Data.csv")

# Define the preprocessor function
def preprocessor(text):
    # Remove HTML tags
    text = re.sub(r'<[^>]*>', '', str(text))
    
    # Find emoticons
    emoticons = re.findall(r'(?::|;|=)(?:-)?(?:\)|\(|D|P)', text)
    
    # Remove non-word characters and convert to lowercase, then add emoticons back
    text = (re.sub(r'[\W]+', ' ', text.lower()) + ' ' + ' '.join(emoticons).replace('-', ''))
    
    return text

# Apply the preprocessor to the "Text" column
Text["Text"] = Text["Text"].apply(preprocessor)

# Save the processed data back to CSV
Text.to_csv("News_Data.csv", index=False)

-------

# Filter data to contain just finance news

In [None]:
class FinanceTextClassifier:
    def __init__(self, finance_keywords_path: str = "Finance_Key_Words.csv"):
        """
        Initialize the classifier with financial keywords and regex patterns.
        
        Args:
            finance_keywords_path (str): Path to the CSV file containing financial keywords.
        """
        

        # Load financial keywords
        self.finance_keywords = self._load_finance_keywords(finance_keywords_path)
        
        # Precompile regex patterns for faster matching
        self.finance_patterns = [
            re.compile(r'\$\d+(\.\d+)?'),  # Currency amounts
            re.compile(r'\d+%'),  # Percentage
            re.compile(r'\b\d+(\.\d+)?\s*(million|billion|trillion)\b'),  # Large number scales
        ]

        # Precompute stopwords for faster filtering
        self.stopwords = set(stopwords.words('english'))

    def _load_finance_keywords(self, path: str) -> Set[str]:
        """
        Load financial keywords from a CSV file.
        
        Args:
            path (str): Path to the CSV file.
        
        Returns:
            Set[str]: A set of financial keywords.
        """
        try:
            return set(pd.read_csv(path)["Key"].str.lower().dropna().unique())
        except Exception as e:
            print(f"Error loading financial keywords: {e}")
            return set()

    def _preprocess_text(self, text: str) -> str:
        """
        Preprocess the text by converting to lowercase, removing special characters,
        and filtering out stopwords.
        
        Args:
            text (str): Input text to preprocess.
        
        Returns:
            str: Preprocessed text.
        """
        text = text.lower()
        text = re.sub(r'[^\w\s]', '', text)  # Remove special characters
        words = text.split()
        words = [word for word in words if word not in self.stopwords]  # Remove stopwords
        return ' '.join(words)

    def _count_finance_keywords(self, text: str) -> Dict[str, int]:
        """
        Count occurrences of finance-related keywords in the text.
        
        Args:
            text (str): Preprocessed text.
        
        Returns:
            Dict[str, int]: A dictionary of financial keywords and their counts.
        """
        keyword_counts = defaultdict(int)
        for word in text.split():
            if word in self.finance_keywords:
                keyword_counts[word] += 1
        return dict(keyword_counts)

    def _check_finance_patterns(self, text: str) -> List[str]:
        """
        Check for financial regex patterns in the text.
        
        Args:
            text (str): Input text.
        
        Returns:
            List[str]: A list of matched financial patterns.
        """
        matched_patterns = []
        for pattern in self.finance_patterns:
            matches = pattern.findall(text)
            if matches:
                matched_patterns.extend(matches)
        return matched_patterns

    def classify_text(self, text: str, threshold: float = 0.1) -> Dict[str, any]:
        """
        Classify if text is finance-related.
        
        Args:
            text (str): Input text to classify.
            threshold (float): Minimum keyword ratio to consider finance-related.
        
        Returns:
            Dict[str, any]: Classification results with details.
        """
        # Preprocess text
        processed_text = self._preprocess_text(text)
        
        # Count words
        total_words = len(processed_text.split())
        
        # Count finance keywords
        keyword_counts = self._count_finance_keywords(processed_text)
        keyword_total_count = sum(keyword_counts.values())
        
        # Check financial patterns
        pattern_matches = self._check_finance_patterns(text)
        
        # Calculate finance keyword ratio
        keyword_ratio = keyword_total_count / total_words if total_words > 0 else 0
        
        # Determine classification
        is_finance_related = keyword_ratio >= threshold or len(pattern_matches) > 0
        
        return {
            'is_finance_related': is_finance_related,
            'keyword_ratio': keyword_ratio,
            'keyword_counts': keyword_counts,
            'pattern_matches': pattern_matches,
            'total_words': total_words
        }


    
Text = pd.read_csv("News_Data.csv")

# Initialize classifier
classifier = FinanceTextClassifier()

# Filter finance-related texts
filtered_results = []
for index, row in Text.iterrows():
    result = classifier.classify_text(row["Text"])
    if result['is_finance_related']:
        filtered_results.append(row)

# Convert to DataFrame
filtered_df = pd.DataFrame(filtered_results)
filtered_df.to_csv("News_Data.csv", index=False)

--------

# Remove duplicates patterns

In [None]:
class PatternExtractor:
    def __init__(self, file_path, min_words=10, occurrence_threshold=4):
        self.file_path = file_path
        self.min_words = min_words
        self.occurrence_threshold = occurrence_threshold
        self.data = self.load_data()

    def load_data(self):
        """Loads dataset from CSV file."""
        return pd.read_csv(self.file_path)

    def preprocess_text(self, text):
        """Cleans text by removing extra spaces, punctuation, and converting to lowercase."""
        text = re.sub('<[^>]*>', '', str(text))
        emoticons = re.findall('(?::|;|=)(?:-)?(?:\(|\)|D|P)', text)
        text = re.sub('[\W]+', ' ', text.lower()) + ' '.join(emoticons).replace('-', '')
        return text.lower()

    def extract_patterns_from_text(self, text):
        """Extracts patterns from a single article."""
        sentences = sent_tokenize(text)
        patterns = set()
        for sentence in sentences:
            words = word_tokenize(sentence)
            for i in range(len(words) - self.min_words + 1):
                patterns.add(' '.join(words[i:i + self.min_words]))
        return patterns

    def find_common_patterns(self):
        """Finds common patterns across articles."""
        pattern_counts = defaultdict(int)
        for article in tqdm(self.data["Text"], desc="Processing Articles"):
            preprocessed_text = self.preprocess_text(article)
            patterns = self.extract_patterns_from_text(preprocessed_text)
            for pattern in patterns:
                pattern_counts[pattern] += 1
        return {pattern: count for pattern, count in pattern_counts.items() if count > 1}

    def remove_patterns_from_text(self, text, patterns):
        """Removes patterns from text."""
        for pattern in patterns:
            text = re.sub(re.escape(pattern), "", text)
        return text

    def update_data(self):
        """Updates data by removing high-frequency patterns."""
        common_patterns = self.find_common_patterns()
        high_frequency_patterns = [pattern for pattern, count in common_patterns.items() if count > self.occurrence_threshold]
        self.data["Text"] = self.data["Text"].apply(lambda x: self.remove_patterns_from_text(x, high_frequency_patterns))
        self.data.to_csv("News_Data_Updated.csv", index=False)
        print("Updated News_Data_Updated.csv saved successfully!")

 
if __name__ == "__main__":
    extractor = PatternExtractor("News_Data.csv")
    extractor.update_data()

---------------------------------

# Extract Date And Formating it

In [47]:
class NewsScraper:
    def __init__(self, csv_file):
        self.csv_file = csv_file
        self.text_df = pd.read_csv(csv_file)

    def get_text(self, url, target_class=None):
        try:
            response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
            response.raise_for_status()  # Raise an error for unsuccessful requests
        except requests.exceptions.RequestException:
            return "0"

        soup = BeautifulSoup(response.content, 'html.parser')

        elements = soup.find_all(class_=target_class)
        if not elements:
            return "0"

        return [element.get_text(strip=True) for element in elements] if target_class else soup.get_text(strip=True)

    def is_valid_date(self, date_str):
        try:
            parse(date_str)
            return True
        except ValueError:
            return False

    def extract_elements_by_class(self, url, target_class):
        result = self.get_text(url, target_class)
        if result != "0" and self.is_valid_date(result[0]):
            return result[0]
        return "0"

    def get_publication_date(self, url):
        try:
            response = requests.get(url)
            response.raise_for_status()

            soup = BeautifulSoup(response.content, 'html.parser')

            date_meta_tags = [
                {"name": "pubdate"}, {"name": "publish-date"}, {"name": "creation-date"},
                {"name": "date"},{"property": "article:published_time"},
                {"property": "og:article:published_time"}
            ]

            for tag in date_meta_tags:
                date_tag = soup.find("meta", tag)
                if date_tag and date_tag.get("content"):
                    return date_tag['content'][:10]

            possible_date_tags = soup.find_all(["time", "span", "p"])
            for tag in possible_date_tags:
                if tag.has_attr("datetime"):
                    return tag['datetime'][:10]
                elif "published" in tag.get("class", []) or "date" in tag.get("class", []):
                    return tag.text.strip()[:10]

            text_content = soup.get_text()
            date_patterns = [
                r'\b\d{4}-\d{2}-\d{2}\b', r'\b\d{2}/\d{2}/\d{4}\b', r'\b\d{1,2} \w{3,9} \d{4}\b'
            ]
            for pattern in date_patterns:
                match = re.search(pattern, text_content)
                if match:
                    return match.group()[:10]

            return "0"
        except requests.RequestException:
            return "0"

    def extract_date_as_date_type(self, text):
        date_pattern = r'\b(\w+)\s(\d{1,2}),\s(\d{4})\b'
        match = re.search(date_pattern, text)

        if match:
            month_name, day, year = match.groups()
            date_obj = datetime.strptime(f"{month_name} {day} {year}", "%B %d %Y").date()
            return date_obj
        return "0"

    def convert_date(self, date_str):
        try:
            date_obj = datetime.strptime(date_str, '%Y-%m-%d')
            return date_obj.strftime('%Y-%m-%d')
        except ValueError:
            pass

        formats = ['%m/%d/%Y', '%d-%b-%y', '%d-%b-%Y', '%a, %b %d, %Y','%B %d, %Y,',"%B %d, %Y", '%a, %b %d, %Y,', 
                   "%b %d, %Y", "%b %d, %Y,","%d-%b-%y", "%a, %b %d, %Y, %I:%M %p", "%a, %b %d, %Y"]

        for fmt in formats:
            try:
                date_obj = datetime.strptime(date_str, fmt)
                return date_obj.strftime('%Y-%m-%d')
            except ValueError:
                continue

        return None

    def extract_dates(self):
        for i in tqdm(range(len(self.text_df)), ncols=100):
            try:
                link = self.text_df["Link"][i]
                current_date = self.text_df["Date"][i]

                if current_date == "0" or pd.isna(current_date):
                    if "abcnews" in link:
                        self.text_df.loc[i, "Date"] = self.extract_elements_by_class(link, "VZTD mLASH gpiba".strip())

                    elif "yahoo" in link:
                        self.text_df.loc[i, "Date"] = self.extract_elements_by_class(link, "byline-attr-meta-time".strip())

                    elif "cnn" in link:
                        timestamp = self.get_text(link, "timestamp vossi-timestamp")
                        self.text_df.loc[i, "Date"] = self.extract_date_as_date_type(timestamp[0]) if timestamp else "0"

                    elif "coindesk" in link:
                        content = self.get_text(link, "Noto_Sans_xs_Sans-400-xs flex gap-4 text-charcoal-600 flex-col md:flex-row")[0][:12]
                        self.text_df.loc[i, "Date"] = content

                    elif "teslarati" in link:
                        content = self.get_text(link, "post-date updated")[0]
                        self.text_df.loc[i, "Date"] = content

                    elif "cnbc" in link:
                        content = self.get_publication_date(link)
                        self.text_df.loc[i, "Date"] = content

                    else:
                        self.text_df.loc[i, "Date"] = current_date
            except Exception as e:
                pass  # Log or handle the exception if needed

    def process_and_save(self):
        self.extract_dates()
        for i in tqdm(range(len(self.text_df)), ncols=100):
            try:
                self.text_df.loc[i, "Date"] = self.convert_date(self.text_df["Date"][i])
            except Exception:
                pass
        self.text_df.to_csv(self.csv_file, index=False)


# Example usage:
scraper = NewsScraper("News_Data.csv")
scraper.process_and_save()


100%|████████████████████████████████████████████████████████| 8760/8760 [00:00<00:00, 73887.04it/s]
100%|█████████████████████████████████████████████████████████| 8760/8760 [00:02<00:00, 4185.51it/s]


----------

# Labeling Text Data

In [5]:
class SentimentAnalyzer:   
    def __init__(self, csv_file):
        self.csv_file = csv_file
        self.text_df = pd.read_csv(csv_file)
        self.tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
        self.model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")
        self.encoder = LabelEncoder()
        
    def get_sentiment(self, text):
        """Get sentiment from text using FinBERT model."""
        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
        outputs = self.model(**inputs)
        probabilities = softmax(outputs.logits, dim=1)
        sentiment_labels = ["neutral", "positive", "negative"]
        return sentiment_labels[probabilities.argmax()]

    def process_sentiments(self):
        """Process and update sentiment labels and encoding in the DataFrame."""
        for i in tqdm(range(len(self.text_df["Sentiment"])), ncols=100):
            try:
                if pd.isna(self.text_df["Sentiment"][i]):  # Only update if sentiment is missing
                    sentiment = self.get_sentiment(self.text_df["Text"][i])
                    self.text_df.loc[i, "Sentiment"] = sentiment
            except Exception as e:
                pass  # Handle or log the exception if necessary

    def encode_sentiments(self):
        """Encode the sentiment column into numerical labels."""
        self.text_df["Labeling_Sentiment"] = self.encoder.fit_transform(self.text_df["Sentiment"])

    def save_to_csv(self):
        """Save the updated DataFrame back to CSV."""
        self.text_df.to_csv(self.csv_file, index=False)

    def run_analysis(self):
        """Run the full sentiment analysis process."""
        self.process_sentiments()
        self.encode_sentiments()
        self.save_to_csv()


# Example usage:
sentiment_analyzer = SentimentAnalyzer("News_Data.csv")
sentiment_analyzer.run_analysis()


100%|███████████████████████████████████████████████████████| 8760/8760 [00:00<00:00, 157060.49it/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


--------

# Cleaning data & Formating it

In [None]:
class DataCleaner:
    def __init__(self, csv_file, is_url_function):
        """
        Initialize the DataCleaner class.
        
        Parameters:
        - csv_file: Path to the CSV file to process.
        - is_url_function: A function to validate URLs.
        """
        self.csv_file = csv_file
        self.is_url = is_url_function
        self.data = pd.read_csv(csv_file)

    def filter_data(self):
        """
        Filter and clean the dataset:
        - Remove duplicate rows based on 'Link' and 'Text'.
        - Keep rows with valid URLs in 'Link'.
        - Exclude rows where 'Text' or 'Date' is invalid or missing.
        """
        self.data = self.data.drop_duplicates(subset=["Link", "Text"])
        self.data = self.data[self.data["Link"].apply(self.is_url).fillna(False)]
        self.data = self.data[self.data["Text"] != "0"]
        self.data = self.data[self.data["Date"] != "0"]
        self.data = self.data.dropna(subset=["Date"])  # Ensure 'Date' column is not None

    def save_filtered_data(self, output_file=None):
        """
        Save the cleaned and filtered data to a new CSV file.
        
        Parameters:
        - output_file: Optional; path to save the cleaned data. Defaults to the original file.
        """
        output_file = output_file or self.csv_file
        self.data.to_csv(output_file, index=False)

    def create_refresh_dict(self):
        """
        Create a dictionary of the filtered data.
        
        Returns:
        A dictionary with keys 'Link', 'Text', 'Sentiment', 'Date', and 'Labeling_Sentiment'.
        """
        return {
            "Link": self.data["Link"].tolist(),
            "Text": self.data["Text"].tolist(),
            "Sentiment": self.data["Sentiment"].tolist(),
            "Date": self.data["Date"].tolist(),
            "Labeling_Sentiment": self.data["Labeling_Sentiment"].tolist(),
        }

    def run_cleaning(self, output_file=None):
        """
        Execute the complete cleaning process:
        - Filter the data.
        - Save the cleaned data.
        - Return the refresh dictionary.
        
        Parameters:
        - output_file: Optional; path to save the cleaned data.
        
        Returns:
        The refresh dictionary.
        """
        self.filter_data()
        self.save_filtered_data(output_file)
        return self.create_refresh_dict()


# Example usage:
def is_url_example(url):
    # Replace this with your `is_url` function implementation
    return True  # Simplified for example

data_cleaner = DataCleaner("News_Data.csv", is_url_function=is_url_example)
refresh_dict = data_cleaner.run_cleaning("News_Data.csv")

-------------

# Display forecasting Using Moveing Avereage & RSI & Sentiment Analysis

In [11]:
# Function to calculate RSI
def calculate_rsi(data, window=14):
    delta = data.diff(1)
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    
    avg_gain = gain.rolling(window=window, min_periods=1).mean()
    avg_loss = loss.rolling(window=window, min_periods=1).mean()
    
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

# Function to analyze stock data
def analyze_stock(keyword):
    # Load stock price data
    stock_data = yf.download(keyword, start="2024-10-01", end=datetime.today())["Close"]
    
    # Load and filter news sentiment data
    news_data = pd.read_csv("News_Data.csv")
    filtered_news = news_data[news_data["Text"].str.contains(keyword, case=False, na=False)]
    
    # Aggregate sentiment by date
    sentiment = (
        filtered_news.groupby('Date')['Labeling_Sentiment']
        .mean()
        .reset_index()
        .set_index("Date")
    )
    
    # Combine sentiment with stock prices
    analysis_df = sentiment.copy()
    analysis_df["Price"] = stock_data
    analysis_df.dropna(inplace=True)
    
    # Calculate indicators
    analysis_df["Moving Average"] = analysis_df["Price"].rolling(window=5).mean()
    analysis_df["RSI"] = calculate_rsi(analysis_df["Price"], window=5)
    
    # Create plots
    fig, axes = plt.subplots(3, 1, figsize=(14, 16), gridspec_kw={"height_ratios": [3, 1, 1]})
    
    # Plot 1: Closing Price and Moving Average
    ax1 = axes[0]
    ax1.plot(analysis_df.index, analysis_df["Price"], label="Closing Price", color="blue", linewidth=2)
    ax1.plot(analysis_df.index, analysis_df["Moving Average"], label="5-Day Moving Average", color="orange", linewidth=2)
    ax1.set_title(f"{keyword} Exchange Rate: Price and Moving Average", fontsize=16, fontweight='bold')
    ax1.set_xlabel("Date", fontsize=14)
    ax1.set_ylabel("Price (USD)", fontsize=14)
    ax1.grid(True, linestyle='--', alpha=0.6)
    ax1.legend(fontsize=12)
    ax1.tick_params(axis='x', rotation=45)
    Cursor(ax1, useblit=True, color='red', linewidth=1, linestyle='--')
    
    # Plot 2: RSI
    ax2 = axes[1]
    ax2.plot(analysis_df.index, analysis_df["RSI"], label="RSI", color="purple", linewidth=2)
    ax2.axhline(70, color="red", linestyle="--", label="Overbought (70)")
    ax2.axhline(30, color="green", linestyle="--", label="Oversold (30)")
    ax2.set_title("Relative Strength Index (RSI)", fontsize=14, fontweight='bold')
    ax2.set_xlabel("Date", fontsize=12)
    ax2.set_ylabel("RSI", fontsize=12)
    ax2.grid(True, linestyle='--', alpha=0.6)
    ax2.legend(fontsize=10)
    ax2.tick_params(axis='x', rotation=45)
    
    # Plot 3: Sentiment Analysis
    ax3 = axes[2]
    ax3.plot(analysis_df.index, analysis_df["Labeling_Sentiment"], label="Sentiment", color="green", linewidth=2)
    sentiment_label = (
        "Neutral Sentiment (1)" if 0.5 <= analysis_df["Labeling_Sentiment"].iloc[-1] <= 1.5 else
        "Positive Sentiment (2)" if analysis_df["Labeling_Sentiment"].iloc[-1] > 1.5 else
        "Negative Sentiment (0)"
    )
    ax3.axhline(0, color="black", linestyle="--", label=sentiment_label)
    ax3.set_title("Sentiment Analysis", fontsize=14, fontweight='bold')
    ax3.set_xlabel("Date", fontsize=12)
    ax3.set_ylabel("Sentiment Score", fontsize=12)
    ax3.grid(True, linestyle='--', alpha=0.6)
    ax3.legend(fontsize=10)
    ax3.tick_params(axis='x', rotation=45)
    
    # Adjust layout and save the plot
    plt.tight_layout()
    output_path = "output_plot.png"
    plt.savefig(output_path)
    plt.close(fig)
    return output_path

# Gradio interface
gui = gr.Interface(
    fn=analyze_stock,
    inputs=gr.Textbox(label="Enter Stock Name"),
    outputs=gr.Image(type="filepath", label="Analysis Plot"),
    title="Stock Sentiment and Price Analysis",
    description="Analyze stock sentiment and price trends based on user input."
)

# Launch the app
gui.launch()


* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


