#### 1. On “https://www.commonsensemedia.org/book-reviews” search for “kids-books-about-911”.

In [1]:
import requests
import json
from bs4 import BeautifulSoup
import pandas as pd
import re
import numpy as np

try:
    url = "https://www.commonsensemedia.org/search/kids-books-about-911"
    response = requests.get(url)

    if response.status_code == 200:
        html_content = response.text
        print('Page received')
        soup = BeautifulSoup(html_content, 'html.parser')
    else:
        print(f"Failed to retrieve page: {response.status_code}")

except Exception as e:
    print("Error")

soup.prettify()

Page received


'<!DOCTYPE html>\n<html dir="ltr" lang="en" prefix="og: https://ogp.me/ns#">\n <head>\n  <meta charset="utf-8"/>\n  <meta content="Common Sense Media improves the lives of kids and families by providing independent reviews, age ratings, &amp; other information about all types of media." name="description"/>\n  <meta content="index, follow" name="robots"/>\n  <link href="https://www.commonsensemedia.org/search/kids-books-about-911" rel="canonical"/>\n  <link href="/themes/custom/common_sense_media_theme/images/share/logo-commonsense-share-012021.jpg" rel="image_src"/>\n  <link href="/themes/custom/common_sense/images/favicons/favicon-16x16.png" rel="icon" sizes="16x16"/>\n  <link href="/themes/custom/common_sense/images/favicons/favicon-32x32.png" rel="icon" sizes="32x32"/>\n  <link href="/themes/custom/common_sense/images/favicons/favicon-96x96.png" rel="icon" sizes="96x96"/>\n  <link href="/themes/custom/common_sense/images/favicons/favicon-192x192.png" rel="icon" sizes="192x192"/>\n 

In [2]:
bookLists = {}
for result in soup.find_all('div', class_='site-search-teaser'):
    book = result.find('a', class_='link link--title')
    # full_reviews=result.find('a',class_='btn btn--primary btn--block')
    bookLists[book.text] = 'https://www.commonsensemedia.org' + book.get('href')
bookLists

{'All About U.S.: A Look at the Lives of 50 Real Kids from Across the United States': 'https://www.commonsensemedia.org/book-reviews/all-about-us-a-look-at-the-lives-of-50-real-kids-from-across-the-united-states',
 'Nothing Like the Movies: Better Than the Movies, Book 2': 'https://www.commonsensemedia.org/book-reviews/nothing-like-the-movies-better-than-the-movies-book-2',
 'Diary of a Wimpy Kid: Hot Mess': 'https://www.commonsensemedia.org/book-reviews/diary-of-a-wimpy-kid-hot-mess',
 'Wrath of the Triple Goddess: Percy Jackson and the Olympians: The Senior Year Adventures, Book 2': 'https://www.commonsensemedia.org/book-reviews/wrath-of-the-triple-goddess-percy-jackson-and-the-olympians-the-senior-year-adventures-book-2',
 'Impossible Creatures: Impossible Creatures, Book 1': 'https://www.commonsensemedia.org/book-reviews/impossible-creatures-impossible-creatures-book-1',
 'Kids Pick the President': 'https://www.commonsensemedia.org/tv-reviews/kids-pick-the-president',
 'Uprooted: A

#### 2. Get the URL for the first page of results and utilize Beautiful Soup to get all of reviews from these pages. (Hint: you will need to import json and extract and parse the JSON-LD content)

In [3]:
Reviews = {}

for book, review_url in bookLists.items():
    Reviews[book] = []
    adult_url = review_url + '/user-reviews/adult'
    child_url = review_url + '/user-reviews/child'
    urls = [adult_url, child_url]

    for new_url in urls:
        try:

            response = requests.get(new_url, timeout=3)

            if response.status_code == 200:
                html_content = response.text
                soup = BeautifulSoup(html_content, 'html.parser')

                for content in soup.find_all('div', class_='user-generated-content__body'):

                    review_1_list = content.find_all('div', class_='reveal__content')
                    for review_1 in review_1_list:
                        if review_1 and review_1.text.strip():
                            Reviews[book].append(review_1.text.strip())

                    review_2_list = content.find_all('div', class_='reveal__content collapse')
                    for review_2 in review_2_list:
                        if review_2 and review_2.text.strip():
                            if review_2.text.strip() not in Reviews[book]:
                                Reviews[book].append(review_2.text.strip())

            else:
                print(f"Failed to retrieve page: {new_url} - Status code: {response.status_code}")

        except Exception as e:
            print(f"Error occurred while processing {new_url}: {e}")

for book, review_data in Reviews.items():
    print(f"Book: {book}")
    for review in review_data:
        print(review)

Book: All About U.S.: A Look at the Lives of 50 Real Kids from Across the United States
Book: Nothing Like the Movies: Better Than the Movies, Book 2
AMAZING BOOK! Many pop culture references from shows like "Gilmore Girls" one of my personal favorites. Some inappropriate moments but teenagers should be fine.
i actually loved this book so much (low key better than the first but don’t come for me). i loved how wes never gave up on liz and kinda loved how grown up liz had gotten! and oh my gosh i literally love sarah and clark together i need a book of just them two!! lynn painters books are always so good to me because as a young christian it’s always hard to find a clean romance book, but lynn’s have never let me down. i will say i sobbed my eyes out the whole time i was reading this because of what wes had to go through and losing liz, but if you cry while reading a book doesn’t that just show how good it is?! anyways 10/10 would recommend! :)
Book: Diary of a Wimpy Kid: Hot Mess
just

#### 3. Use afinn method for sentiment analysis.  The output should be a data frame with each review, the afn.score and label you create based on the score (0: neutral, <0 negative and >0 positive). This is an example of the afinn sentiment analysis output.
#### 4. Add the name of the book to the data frame in question 3. 

In [4]:
from afinn import Afinn

afinn = Afinn()
books = []
reviews = []
scores = []
sentiments = []
for book, review_data in Reviews.items():
    if review_data:
        for review in review_data:
            books.append(book)
            score = afinn.score(review)
            if score == 0:
                label = 'neutral'
            elif score < 0:
                label = 'negative'
            else:
                label = 'positive'
            reviews.append(review)
            scores.append(score)
            sentiments.append(label)

df = pd.DataFrame({'Book': books, 'Review': reviews, 'Score': scores, 'Sentiment': sentiments})
df

Unnamed: 0,Book,Review,Score,Sentiment
0,Nothing Like the Movies: Better Than the Movie...,AMAZING BOOK! Many pop culture references from...,8.0,positive
1,Nothing Like the Movies: Better Than the Movie...,i actually loved this book so much (low key be...,21.0,positive
2,Diary of a Wimpy Kid: Hot Mess,just.... PERFECT I cant even discribe it its ...,8.0,positive
3,Diary of a Wimpy Kid: Hot Mess,The 19th book in the famous Wimpy Kid series i...,3.0,positive
4,Diary of a Wimpy Kid: Hot Mess,Spagreggy Best Spagreggy Amazing,7.0,positive
5,Diary of a Wimpy Kid: Hot Mess,AMAZING BOOK!!!!! I ABSOLUTELY LOVE THE DIARY ...,4.0,positive
6,Diary of a Wimpy Kid: Hot Mess,This book is just too good.,3.0,positive
7,Wrath of the Triple Goddess: Percy Jackson and...,"I liked this book, and I think any kids 9+ who...",0.0,neutral
8,Kids Pick the President,I restock this box of cookies about once a mon...,16.0,positive
9,The Book of Bill,Definitely not for kids. That's the first sent...,15.0,positive


#### 5. Use NRCLex method for senNment analysis. Pass each review to NRCLex and get top_emoNons for that review, store all reviews and their top emoNons in a text file. Here is an example of how it should look like for each review:  

In [5]:
from nrclex import NRCLex

df['Top_Emotions'] = None

for index, row in df.iterrows():
    review_text = row['Review']
    emotion_analysis = NRCLex(review_text)

    top_emotions = emotion_analysis.top_emotions  # 返回一个情绪-分数的元组列表，例如 [('joy', 0.5), ('trust', 0.3)]

    df.at[index, 'Top_Emotions'] = top_emotions

df

Unnamed: 0,Book,Review,Score,Sentiment,Top_Emotions
0,Nothing Like the Movies: Better Than the Movie...,AMAZING BOOK! Many pop culture references from...,8.0,positive,"[(negative, 0.25)]"
1,Nothing Like the Movies: Better Than the Movie...,i actually loved this book so much (low key be...,21.0,positive,"[(positive, 0.2564102564102564)]"
2,Diary of a Wimpy Kid: Hot Mess,just.... PERFECT I cant even discribe it its ...,8.0,positive,"[(trust, 0.25), (positive, 0.25), (joy, 0.25)]"
3,Diary of a Wimpy Kid: Hot Mess,The 19th book in the famous Wimpy Kid series i...,3.0,positive,"[(anger, 0.25), (trust, 0.25), (positive, 0.25..."
4,Diary of a Wimpy Kid: Hot Mess,Spagreggy Best Spagreggy Amazing,7.0,positive,"[(fear, 0.0), (anger, 0.0), (anticip, 0.0), (t..."
5,Diary of a Wimpy Kid: Hot Mess,AMAZING BOOK!!!!! I ABSOLUTELY LOVE THE DIARY ...,4.0,positive,"[(sadness, 0.5)]"
6,Diary of a Wimpy Kid: Hot Mess,This book is just too good.,3.0,positive,"[(trust, 0.2), (surprise, 0.2), (positive, 0.2..."
7,Wrath of the Triple Goddess: Percy Jackson and...,"I liked this book, and I think any kids 9+ who...",0.0,neutral,"[(anger, 0.18181818181818182), (positive, 0.18..."
8,Kids Pick the President,I restock this box of cookies about once a mon...,16.0,positive,"[(trust, 0.29411764705882354), (positive, 0.29..."
9,The Book of Bill,Definitely not for kids. That's the first sent...,15.0,positive,"[(positive, 0.21739130434782608)]"


In [6]:
with open("reviews_with_emotions.txt", "w", encoding="utf-8") as file:
    for index, row in df.iterrows():
        review = row['Review']
        top_emotions = row['Top_Emotions']

        file.write(f"{review}: {top_emotions}\n\n")

print("reviews_with_emotions.txt saved successfully.")

reviews_with_emotions.txt saved successfully.
