In [4]:
# %load main.py
import json
import requests
import streamlit as st
import matplotlib.pyplot as plt
import pandas as pd
import nltk
import main_functions
from nltk import sent_tokenize
from nltk import word_tokenize
from nltk.probability import FreqDist
from nltk.corpus import stopwords
from wordcloud import WordCloud
from PIL import Image
# nltk.download("punkt")
# nltk.download("stopwords")

api_key_dict = main_functions.read_from_file("JSON_Files/api_key.json")
api_key = api_key_dict["api_key"]



st.title("COP4813 - Web Application Programming")
st.header("Project 1")
st.subheader("Part A - The Stories API")
st.write("This app uses the Top Stories API to display the most common words used in the top current articles"
         "based on a specified topic selected by the user. The data is displayed as a line chart and as a wordcloud "
         "image.")
st.subheader("I - Topic Selection")
user_name = st.text_input("Please enter your name", "")

option = st.selectbox(
    "Select a topic of your interest",
    ("Arts", "Automobiles", "Books", "Business", "Fashion", "Food", "Health", "Home", "Insider", "Magazine",
     "Movies", "NYRegion", "Obituaries", "Opinion", "Politics", "RealEstate", "Science", "Sports", "SundayReview",
     "Technology", "Theater", "T-Magazine", "Travel", "Upshot", "US", "World"), index=0)

stories_url = f"https://api.nytimes.com/svc/topstories/v2/{option.lower()}.json?api-key={api_key}"
#print(url)
stories_response = requests.get(stories_url).json()

main_functions.save_to_file(stories_response, "JSON_Files/response.json")

story_abstracts = ""

stopwords = stopwords.words("english")
st.write("Hi %s, you selected the %s topic." % (user_name, option))

st.subheader("II - Frequency Distribution")
fDistribution = st.checkbox("Click here to generate frequency distribution")
if fDistribution:
    clean_words = []
    articles = main_functions.read_from_file("JSON_Files/response.json")
    for i in articles["results"]:
        story_abstracts = story_abstracts + i["abstract"]

    words = word_tokenize(story_abstracts)
    for w in words:
        if w.isalpha() and w.lower() not in stopwords:
            clean_words.append(w.lower())
    fDist = FreqDist(clean_words)
    common_words = {"List": fDist.most_common(10)}
    x_axis = [x[0] for x in common_words["List"]]
    y_axis = [y[1] for y in common_words["List"]]
    chart_data = pd.DataFrame({"Words": x_axis, "Occurrences": y_axis})
    chart_data = chart_data.rename(columns={"Words": "times"}).set_index("times")
    #print(chart_data)
    st.line_chart(chart_data)

st.subheader("III - Wordcloud")
wordCloud = st.checkbox("Click here to generate wordcloud")
if wordCloud:
    clean_words = []
    articles = main_functions.read_from_file("JSON_Files/response.json")
    for i in articles["results"]:
        abstracts = story_abstracts + i["abstract"]

    # words = word_tokenize(abstracts)
    # for w in words:
    #     if w.isalpha() and w not in stopwords:
    #         clean_words.append(w.lower())

    wordcloud = WordCloud().generate(story_abstracts)
    plt.figure(figsize=(12,12))
    plt.imshow(wordcloud)
    plt.axis("off")
    st.pyplot(plt)
    st.write("Wordcloud generated for %s topic." % option)

st.subheader("Part B - Most Popular Articles")
st.write("Select if you want to see the most shared, emailed, or viewed articles.")
articleType = st.selectbox("Select your preferred set of articles", ("Shared", "Emailed", "Viewed"))
timePeriod = st.selectbox("Select the period of time (last days)", ("1", "7", "30"))

popular_url = f"https://api.nytimes.com/svc/mostpopular/v2/{articleType.lower()}/{timePeriod.lower()}.json?api-key={api_key}"
popular_response = requests.get(popular_url).json()
#print(popular_response)
#main_functions.save_to_file(popular_response, "JSON_Files/response2.json")

pop_abstracts = ""
pop_clean_words = []
articles = main_functions.read_from_file("JSON_Files/response2.json")
for i in articles["results"]:
    pop_abstracts = pop_abstracts + i["abstract"]

# words = word_tokenize(abstracts)
# for w in words:
#     if w.isalpha() and w not in stopwords:
#         clean_words.append(w.lower())

pop_wordcloud = WordCloud().generate(pop_abstracts)
plt.figure(figsize=(12,12))
plt.imshow(pop_wordcloud)
plt.axis("off")
st.pyplot(plt)

ModuleNotFoundError: No module named 'wordcloud'