# **AI Powered News Assistant**

## **Import libraries**

In [35]:
import os
import numpy as np
import pandas as pd
import nltk
import requests
from gtts import gTTS
import tempfile
import time
from datetime import datetime
import threading
from bs4 import BeautifulSoup
import streamlit as st
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
import language_tool_python
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import gradio as gr
import warnings
warnings.filterwarnings("ignore")

## **News Categorizer System**

In [36]:
API_KEY = '8a9481f1cda44b65a209c39bca3851f7'
BASE_URL = 'https://newsapi.org/v2/top-headlines'

def fetch_news(category='general'):
    params = {
        'apiKey': API_KEY,
        'category': category,
        'country': 'in',
        'pageSize': 100  # Number of articles to fetch
    }

    response = requests.get(BASE_URL, params=params)
    if response.status_code == 200:
        return response.json()['articles']
    else:
        return []

def process_news(articles):
    news_data = []
    for article in articles:
        news_data.append({
            'title': article['title'],
            'url': article['url'],
            'description': article['description'],
            'publishedAt': article['publishedAt'],
            'category': article.get('category', 'general')
        })
    return pd.DataFrame(news_data)

def get_news_data():
    categories = ['business', 'entertainment', 'general', 'health', 'science', 'sports', 'technology']
    news_dfs = {}

    for category in categories:
        print(f"Fetching {category} news...")
        articles = fetch_news(category)
        news_dfs[category] = process_news(articles)

    return news_dfs

# Fetch and store news data
news_data = get_news_data()

Fetching business news...
Fetching entertainment news...
Fetching general news...
Fetching health news...
Fetching science news...
Fetching sports news...
Fetching technology news...


In [37]:
def recommend_news(category):
    df = news_data.get(category, pd.DataFrame())
    if df.empty:
      return "No news available for this category."
    else:
      recommendations = []
      for _, row in df.iterrows():
        recommendations.append({
            'title': row['title'],
            'publishedAt': row['publishedAt'],
            'url': row['url']
            })
      return recommendations


# Create a Gradio interface
interface = gr.Interface(
    fn=recommend_news,
    inputs=gr.Dropdown(choices=['business', 'entertainment', 'general', 'health', 'science', 'sports', 'technology'], label="Select News Category"),
    outputs=gr.JSON(label="News Recommendations"),
    live=True
)

interface.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://36ceb66deafa3f8398.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




## **AI-Assisted Writer**

In [38]:
model_name = "t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

summarization_pipeline = pipeline("summarization", model=model, tokenizer=tokenizer)

In [39]:
tool = language_tool_python.LanguageTool('en-US')

def grammar_check(text):
    matches = tool.check(text)
    corrections = []
    for match in matches:
        corrections.append({
            "mistake": match.context,
            "suggestion": match.replacements,
            "message": match.message
        })
    return corrections

In [40]:
def suggest_improvements(text):
    # Summarize the input text to get a suggestion for improvement
    suggestions = summarization_pipeline(text, max_length=50, min_length=25, do_sample=False)
    return suggestions[0]['summary_text']

In [41]:
def ai_assistant_writer(text):
    suggestions = suggest_improvements(text)
    grammar_corrections = grammar_check(text)

    return {
        "Original Text": text,
        "Suggestions": suggestions,
        "Grammar Corrections": grammar_corrections
    }

In [42]:
def gradio_interface(text):
    results = ai_assistant_writer(text)
    return results["Suggestions"], results["Grammar Corrections"]

iface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(lines=10, placeholder="Type your paragraph here..."),
    outputs=[
        gr.Textbox(label="Suggestions"),
        gr.JSON(label="Grammar Corrections")
    ],
    title="AI-Assisted Writer",
    description="This AI tool offers suggestions, grammar checks, and style improvements for your writing.",
)

iface.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://f9d41fc52cd888aeb0.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




## **Text To Speech (TTS) Model**

In [43]:
def text_to_speech(text):
    # Create a gTTS object
    tts = gTTS(text=text, lang='en')

    # Save the audio file to a temporary location
    with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
        temp_file_path = temp_file.name
        tts.save(temp_file_path)

    # Return the path of the saved audio file
    return temp_file_path

In [44]:
def create_gradio_interface():
    # Define Gradio interface
    interface = gr.Interface(
        fn=text_to_speech,               # Function to call
        inputs=gr.Textbox(lines=2, placeholder="Enter text here..."),  # Input component
        outputs=gr.Audio(type="filepath"),    # Output component
        title="Text to Speech Converter", # Title of the interface
        description="Enter text and get the audio file as output."  # Description
    )

    # Launch the interface
    interface.launch()

In [45]:
create_gradio_interface()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://22ffe308575a046141.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


## **Real Time News Updater**

In [46]:
API_KEY = '8a9481f1cda44b65a209c39bca3851f7'
BASE_URL = 'https://newsapi.org/v2/top-headlines'

# Function to fetch breaking news
def fetch_breaking_news():
    params = {
        'apiKey': API_KEY,
        'country': 'in',
        'category': 'general',
        'pageSize': 100     # Number of articles to fetch
    }

    response = requests.get(BASE_URL, params=params)
    if response.status_code == 200:
        articles = response.json()['articles']
        news_list = []
        for article in articles:
            news_list.append({
                'title': article['title'],
                'description': article['description'],
                'publishedAt': article['publishedAt'],
                'url': article['url'],
            })
        return news_list
    else:
        return []

# Function to format news into a DataFrame
def format_news(news_list):
    df = pd.DataFrame(news_list)
    if df.empty:
        return "No news available at the moment."
    return df[['title','publishedAt' , 'url']].head(10).to_dict(orient='records')

# Function to get the latest news
def get_latest_news():
    time.sleep(1)  # Simulate a delay of 1 second
    news_list = fetch_breaking_news()
    return format_news(news_list)

# Function to create the Gradio interface
def create_gradio_interface():
    interface = gr.Interface(
        fn=get_latest_news,                 # Function to call
        inputs=[],                          # No inputs required
        outputs=gr.JSON(label="Top 10 Breaking News in India"),  # Output as JSON
        title="Breaking News Updater",       # Title
        description="Click 'Refresh' to get the latest breaking news in India."  # Description
    )
    interface.launch()

# Launch the Gradio interface
create_gradio_interface()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://f86423521728c565c5.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


## **News Recommendation System**

In [47]:
API_KEY = '8a9481f1cda44b65a209c39bca3851f7'
BASE_URL = 'https://newsapi.org/v2/top-headlines'

# Function to fetch news
def fetch_news(category='general', country='in'):
    params = {
        'apiKey': API_KEY,
        'category': category,
        'country': country,
        'pageSize': 1000
    }

    response = requests.get(BASE_URL, params=params)
    if response.status_code == 200:
        articles = response.json()['articles']
        news_list = []
        for article in articles:
            news_list.append({
                'title': article['title'],
                'description': article['description'],
                'url': article['url'],
                'publishedAt': article['publishedAt'],
                'category': category
            })
        return news_list
    else:
        return []


# Example user preferences and history
user_preferences = ['business', 'entertainment', 'general', 'health', 'science', 'sports', 'technology']
user_reading_history = []

def get_personalized_recommendations(news_data):
    # Filter news based on user preferences
    filtered_news = [news for news in news_data if news['category'] in user_preferences]
    # Optionally, filter out news already read
    filtered_news = [news for news in filtered_news if news not in user_reading_history]

    # Sort by publish date
    sorted_news = sorted(filtered_news, key=lambda x: x['publishedAt'], reverse=True)
    return sorted_news[:10]

def get_trending_topics(news_data):
    # Count occurrences of each category to find trending topics
    categories = [news['category'] for news in news_data]
    category_counts = Counter(categories)
    trending_categories = [item[0] for item in category_counts.most_common(5)]

    # Get top news articles from trending categories
    trending_news = [news for news in news_data if news['category'] in trending_categories]
    trending_news = sorted(trending_news, key=lambda x: x['publishedAt'], reverse=True)
    return trending_news[:10]

def get_news_recommendations():
    all_news = []
    for category in user_preferences:
        all_news.extend(fetch_news(category=category))

    # Get personalized recommendations
    personalized_recommendations = get_personalized_recommendations(all_news)

    # Get trending topics
    trending_news = get_trending_topics(all_news)

    return {
        'Personalized Recommendations': personalized_recommendations,
        'Trending News': trending_news
    }

# Define Gradio interface
def create_gradio_interface():
    interface = gr.Interface(
        fn=get_news_recommendations,    # Function to call
        inputs=[],                      # No inputs required
        outputs=[
            gr.JSON(label="Personalized Recommendations"),  # Output as JSON
            gr.JSON(label="Trending News")                  # Output as JSON
        ],
        title="Personalized News Recommendations",  # Title
        description="Get personalized news recommendations based on your preferences and see the latest trending news."  # Description
    )
    interface.launch()

# Launch the Gradio interface
create_gradio_interface()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://bc6f9666bc76e72b9a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


## **Automated Content Summarizer**

In [48]:
# Initialize the summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Function to summarize the article text
def summarize_text(article):
    # Ensure the input text is not too short
    if len(article.split()) < 30:
        return "Please enter a longer article for summarization."

    # Generate the summary
    summary = summarizer(article, max_length=130, min_length=30, do_sample=False)

    # Extract and return the summary text
    return summary[0]['summary_text']

# Create the Gradio interface
interface = gr.Interface(
    fn=summarize_text,             # The function to summarize the text
    inputs=gr.Textbox(lines=10, label="Enter Article Text"),  # Input text box
    outputs=gr.Textbox(label="Summary"),  # Output text box
    title="Automated Content Summarizer",        # Title of the app
    description="Enter a lengthy article or text, and this tool will generate a concise and informative summary."  # App description
)

# Launch the Gradio interface
interface.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://3c469f54819faac20b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




## **Fake News Detector**

In [49]:
# Load dataset
from google.colab import drive
drive.mount('/content/drive')
fake = pd.read_csv('/content/drive/MyDrive/Fake.csv')
real = pd.read_csv('/content/drive/MyDrive/True.csv')

# Create labels
fake['label'] = 0
real['label'] = 1

# Concatenate datasets
data = pd.concat([fake, real], ignore_index=True)

# Split data
X = data['text']
y = data['label']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', RandomForestClassifier())
])
pipeline.fit(X_train, y_train)

# Fetch real-time news
API_KEY = '8a9481f1cda44b65a209c39bca3851f7'
BASE_URL = 'https://newsapi.org/v2/top-headlines'

def fetch_real_time_news():
    params = {
        'apiKey': API_KEY,
        'country': 'us',
        'pageSize': 10
    }
    response = requests.get(BASE_URL, params=params)
    if response.status_code == 200:
        articles = response.json()['articles']
        return [{'title': article['title'], 'description': article['description']} for article in articles]
    else:
        return []

# Detect fake news
def detect_fake_news(news_list):
    results = []
    for news in news_list:
        text = news['title'] + " " + (news['description'] if news['description'] else "")
        prediction = pipeline.predict([text])[0]
        results.append({
            'title': news['title'],
            'description': news['description'],
            'is_fake': 'Fake' if prediction == 1 else 'Real'
        })
    return results

# Create the Gradio interface
def summarize_and_classify():
    news_list = fetch_real_time_news()
    if not news_list:
        return "No news available at the moment."
    results = detect_fake_news(news_list)
    formatted_results = [
        f"Title: {result['title']}\nDescription: {result['description']}\nStatus: {result['is_fake']}\n\n"
        for result in results
    ]
    return "\n".join(formatted_results)

interface = gr.Interface(
    fn=summarize_and_classify,
    inputs=[],
    outputs=gr.Textbox(label="News Classification"),
    title="Fake News Detector",
    description="Get real-time news and check if it's fake or real."
)

interface.launch()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://dcf919fff5ee326a3b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


