In [1]:
import streamlit as st
import newspaper
import pandas as pd
import nltk
from nltk.corpus import stopwords
from wordcloud import WordCloud
from textblob import TextBlob
import matplotlib.pyplot as plt

# Download NLTK data
nltk.download('stopwords')

# Define the Streamlit app
def main():
    st.title("News Article Analyzer")
    st.subheader("Enter a News Article URL to Analyze Content")

    # Input URL
    url = st.text_input("Enter a News Article URL:")

    if url:
        st.write("Fetching and analyzing the article...")
        try:
            # Fetch the article
            article = newspaper.Article(url=url, language='en')
            article.download()
            article.parse()

            # Article details
            article_data = {
                "title": article.title,
                "text": article.text,
                "authors": article.authors,
                "published_date": article.publish_date,
                "top_image": article.top_image,
            }

            # Display Article Information
            st.header("Article Details")
            st.write(f"**Title:** {article_data['title']}")
            st.write(f"**Published Date:** {article_data['published_date']}")
            st.write(f"**Authors:** {', '.join(article_data['authors'])}")
            st.image(article_data['top_image'], caption="Top Image", use_column_width=True)

            # Show full text
            with st.expander("Read Full Article Text"):
                st.write(article_data['text'])

            # NLP Analysis
            st.header("NLP Analysis")

            # Tokenization
            tokens = article_data['text'].split()
            filtered_tokens = [word for word in tokens if word.lower() not in stopwords.words('english')]

            # Word Cloud
            st.subheader("Word Cloud")
            wordcloud = WordCloud(width=800, height=400, background_color="white").generate(" ".join(filtered_tokens))
            plt.figure(figsize=(10, 5))
            plt.imshow(wordcloud, interpolation="bilinear")
            plt.axis("off")
            st.pyplot(plt)

            # Sentiment Analysis
            st.subheader("Sentiment Analysis")
            sentiment = TextBlob(article_data['text']).sentiment
            st.write(f"**Polarity:** {sentiment.polarity}")
            st.write(f"**Subjectivity:** {sentiment.subjectivity}")
            if sentiment.polarity > 0:
                st.success("The article has a positive sentiment.")
            elif sentiment.polarity < 0:
                st.error("The article has a negative sentiment.")
            else:
                st.info("The article has a neutral sentiment.")

        except Exception as e:
            st.error(f"Error processing the article: {e}")

if __name__ == "__main__":
    main()


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/weixuanhuang/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
2024-11-29 09:16:43.884 
  command:

    streamlit run /Users/weixuanhuang/Library/Python/3.9/lib/python/site-packages/ipykernel_launcher.py [ARGUMENTS]
2024-11-29 09:16:43.886 Session state does not function when running a script without `streamlit run`
