In [1]:
import joblib

loaded_model = joblib.load('sentiment_analysis_model.pkl')

# Now you can use loaded_model for prediction
test_headline = ["One in five BEST buses now electric make up 1.6% of all"]
predicted_label = loaded_model.predict(test_headline)
print(predicted_label)

[1]


In [None]:
from bs4 import BeautifulSoup
import requests
import csv
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model

# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Function for preprocessing
def preprocessing(text):
    # Tokenization
    tokens = word_tokenize(text)

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word.lower() not in stop_words]

    # Convert tokens to lowercase
    tokens = [token.lower() for token in tokens]

    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]

    return ' '.join(tokens)

def get_negative_articles(topic):
    # Scrape news articles
    url = f'https://news.google.com/search?q={topic}&hl=en-IN&gl=IN&ceid=IN%3Aen'
    page = requests.get(url)
    soup = BeautifulSoup(page.text, 'html.parser')
    articles = soup.find_all('a', class_='WwrzSb')
    articles_list = []

    for article in articles:
        link = 'https://news.google.com' + article['href'][1:]
        try:
            p1 = requests.get(link)
            if p1 is None or p1.status_code in [403, 404]:
                continue

            soup1 = BeautifulSoup(p1.text, 'html.parser')
            heading_tag = soup1.find('h1')

            if heading_tag:
                heading = heading_tag.get_text(strip=True)
                text = '\n'.join([p.get_text(strip=True) for p in soup1.find_all('p')])
                articles_list.append((heading, text))
        except requests.exceptions.ConnectionError as e:
            print(f"Connection error occurred: {e}")
            continue

    # Save scraped articles in a CSV file
    file_name = f"{topic}_articles.csv"
    with open(file_name, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Heading', 'Text'])
        writer.writerows(articles_list)

    # Preprocess scraped articles
    news = pd.DataFrame(articles_list, columns=['Heading', 'Text'])
    news['Heading'] = news['Heading'].apply(preprocessing)

    # Drop rows with NaN in the 'Text' column
    news.dropna(subset=['Heading'], inplace=True)

    # Load sentiment analysis model and tokenizer
    #loaded_model = load_model('sentiment_analysis_model.h5')
    tokenizer = Tokenizer(num_words=5000)
    tokenizer.fit_on_texts(news['Heading'])

    # Pad sequences
    sequences = tokenizer.texts_to_sequences(news['Heading'])
    maxlen = 100
    #padded_sequences = pad_sequences(sequences, maxlen=maxlen, padding='post', truncating='post')

    # Predict sentiments
    predictions = loaded_model.predict(news['Heading'])

    # Filter out articles with negative sentiment
    negative_articles = news[predictions == 0]

    return negative_articles

# Example usage
topic = input("Enter the topic: ")
negative_articles = get_negative_articles(topic)
print("Negative Articles:")
print(negative_articles[['Heading']])



[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sidhesh\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sidhesh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\sidhesh\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Enter the topic:  amul


In [None]:
import tkinter as tk
from tkinter import scrolledtext
from tkinter import messagebox
from PIL import Image, ImageTk
from bs4 import BeautifulSoup
import requests
import csv
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model

# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Function for preprocessing
def preprocessing(text):
    # Tokenization
    tokens = word_tokenize(text)

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word.lower() not in stop_words]

    # Convert tokens to lowercase
    tokens = [token.lower() for token in tokens]

    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]

    return ' '.join(tokens)

def get_negative_articles(topic):
    # Scrape news articles
    url = f'https://news.google.com/search?q={topic}&hl=en-IN&gl=IN&ceid=IN%3Aen'
    page = requests.get(url)
    soup = BeautifulSoup(page.text, 'html.parser')
    articles = soup.find_all('a', class_='WwrzSb')
    articles_list = []

    for article in articles:
        link = 'https://news.google.com' + article['href'][1:]
        try:
            p1 = requests.get(link)
            if p1 is None or p1.status_code in [403, 404]:
                continue

            soup1 = BeautifulSoup(p1.text, 'html.parser')
            heading_tag = soup1.find('h1')

            if heading_tag:
                heading = heading_tag.get_text(strip=True)
                text = '\n'.join([p.get_text(strip=True) for p in soup1.find_all('p')])
                articles_list.append((heading, text))
        except requests.exceptions.ConnectionError as e:
            print(f"Connection error occurred: {e}")
            continue

    # Save scraped articles in a CSV file
    file_name = f"{topic}_articles.csv"
    with open(file_name, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Heading', 'Text'])
        writer.writerows(articles_list)

    # Preprocess scraped articles
    news = pd.DataFrame(articles_list, columns=['Heading', 'Text'])
    news['Heading'] = news['Heading'].apply(preprocessing)

    # Drop rows with NaN in the 'Text' column
    news.dropna(subset=['Heading'], inplace=True)

    # Load sentiment analysis model and tokenizer
    #loaded_model = load_model('sentiment_analysis_model.h5')
    tokenizer = Tokenizer(num_words=5000)
    tokenizer.fit_on_texts(news['Heading'])

    # Pad sequences
    sequences = tokenizer.texts_to_sequences(news['Heading'])
    maxlen = 100
    #padded_sequences = pad_sequences(sequences, maxlen=maxlen, padding='post', truncating='post')

    # Predict sentiments
    #predictions = loaded_model.predict(news['Heading'])

    # For demonstration, assuming all articles are negative
    predictions = [0] * len(news)

    # Filter out articles with negative sentiment
    negative_articles = news[predictions == 0]

    return negative_articles

def search_topic():
    topic = topic_entry.get()
    if topic:
        negative_articles = get_negative_articles(topic)
        if not negative_articles.empty:
            display_output(negative_articles)
        else:
            messagebox.showinfo("Info", "No negative articles found for the given topic.")
    else:
        messagebox.showerror("Error", "Please enter a topic.")

def display_output(articles):
    output_text.config(state=tk.NORMAL)
    output_text.delete('1.0', tk.END)
    for index, row in articles.iterrows():
        output_text.insert(tk.END, f"{row['Heading']}\n\n")
    output_text.config(state=tk.DISABLED)

# Create the main window
root = tk.Tk()
root.title("Negative News Finder")
root.geometry("800x600")

# Add background image
background_image = Image.open("background_image.jpg")
background_image = background_image.resize((800, 600), Image.ANTIALIAS)
bg_image = ImageTk.PhotoImage(background_image)
background_label = tk.Label(root, image=bg_image)
background_label.place(relwidth=1, relheight=1)

# Add title label
title_label = tk.Label(root, text="Negative News Finder", font=("Helvetica", 24), bg="#007acc", fg="white")
title_label.pack(fill=tk.X)

# Add topic entry
topic_entry = tk.Entry(root, font=("Helvetica", 14))
topic_entry.pack(pady=20, padx=20, fill=tk.X)

# Add search button
search_button = tk.Button(root, text="Search", command=search_topic, font=("Helvetica", 14))
search_button.pack(pady=10)

# Add output text area
output_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, font=("Helvetica", 12), state=tk.DISABLED)
output_text.pack(pady=20, padx=20, fill=tk.BOTH, expand=True)

root.mainloop()
