# The Daily Bugle

## Scrapping data for Comic Story Arcs

Installing and importing Libraries

In [278]:
!pip install requests beautifulsoup4



In [279]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import time

Scraping the Main Comic Story Arc Table from fandom page

In [301]:
import requests
import pandas as pd

Comic_url = "https://en.wikipedia.org/wiki/List_of_Spider-Man_storylines"

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

response = requests.get(Comic_url, headers=headers)
html_content = response.text

tables = pd.read_html(html_content)

df_arcs = None
for table in tables:
    if len(table.columns) > 1:
        df_arcs = table
        break

print("Successfully scraped the main table and cleaned titles.")
df_arcs.head()

Successfully scraped the main table and cleaned titles.


  tables = pd.read_html(html_content)


Unnamed: 0,Storyline,Issue(s),Publication date,Writer(s),Penciller(s)
0,"""If This Be My Destiny...!""[1][2]",The Amazing Spider-Man #31–33,December 1965 – February 1966,Stan Lee Steve Ditko,Steve Ditko
1,"""How Green Was My Goblin!""[3]",The Amazing Spider-Man #39–40,August 1966 – September 1966,Stan Lee,John Romita Sr.
2,"""Spider-Man No More!""[4][5]",The Amazing Spider-Man #50–52,July 1967 – September 1967,Stan Lee,John Romita Sr.
3,"""Doc Ock Wins!""[6]","The Amazing Spider-Man #53–56, 58",October 1967 – January 1968,Stan Lee,John Romita Sr.
4,"""Lo, This Monster""[7]",The Spectacular Spider-Man (magazine) #1–2,July–November 1968,Stan Lee,John Romita Sr.


In [302]:
df_arcs.columns = ['Storyline', 'Issues', 'Publication_Date', 'Writers', 'Pencillers']
df_arcs['Storyline'] = df_arcs['Storyline'].str.replace(r'\[\d+\]', '', regex=True).str.replace('"', '')
df_arcs.head()

Unnamed: 0,Storyline,Issues,Publication_Date,Writers,Pencillers
0,If This Be My Destiny...!,The Amazing Spider-Man #31–33,December 1965 – February 1966,Stan Lee Steve Ditko,Steve Ditko
1,How Green Was My Goblin!,The Amazing Spider-Man #39–40,August 1966 – September 1966,Stan Lee,John Romita Sr.
2,Spider-Man No More!,The Amazing Spider-Man #50–52,July 1967 – September 1967,Stan Lee,John Romita Sr.
3,Doc Ock Wins!,"The Amazing Spider-Man #53–56, 58",October 1967 – January 1968,Stan Lee,John Romita Sr.
4,"Lo, This Monster",The Spectacular Spider-Man (magazine) #1–2,July–November 1968,Stan Lee,John Romita Sr.


Looking for Links to each storyline page

In [303]:
response = requests.get(Comic_url, headers=headers)

soup = BeautifulSoup(response.content, 'html.parser')

wiki_table = soup.find('table', {'class': 'wikitable'})

links = []
if wiki_table:
    for row in wiki_table.find('tbody').find_all('tr')[1:]:
        first_cell = row.find('td')
        link_tag = first_cell.find('a') if first_cell else None

        if link_tag and link_tag.get('href') and link_tag.get('href').startswith('/wiki/'):
            links.append("https://en.wikipedia.org" + link_tag.get('href'))
        else:
            links.append(None)

    df_arcs['link'] = links

    print("Successfully extracted links for all story arcs.")
else:
    print("Error: Could not find the 'wikitable' on the page. The page structure may have changed.")

df_arcs.head()

Successfully extracted links for all story arcs.


Unnamed: 0,Storyline,Issues,Publication_Date,Writers,Pencillers,link
0,If This Be My Destiny...!,The Amazing Spider-Man #31–33,December 1965 – February 1966,Stan Lee Steve Ditko,Steve Ditko,https://en.wikipedia.org/wiki/If_This_Be_My_De...
1,How Green Was My Goblin!,The Amazing Spider-Man #39–40,August 1966 – September 1966,Stan Lee,John Romita Sr.,https://en.wikipedia.org/wiki/How_Green_Was_My...
2,Spider-Man No More!,The Amazing Spider-Man #50–52,July 1967 – September 1967,Stan Lee,John Romita Sr.,
3,Doc Ock Wins!,"The Amazing Spider-Man #53–56, 58",October 1967 – January 1968,Stan Lee,John Romita Sr.,
4,"Lo, This Monster",The Spectacular Spider-Man (magazine) #1–2,July–November 1968,Stan Lee,John Romita Sr.,


In [304]:
df_arcs = df_arcs[df_arcs['link'].notna()].copy()
df_arcs.head()

Unnamed: 0,Storyline,Issues,Publication_Date,Writers,Pencillers,link
0,If This Be My Destiny...!,The Amazing Spider-Man #31–33,December 1965 – February 1966,Stan Lee Steve Ditko,Steve Ditko,https://en.wikipedia.org/wiki/If_This_Be_My_De...
1,How Green Was My Goblin!,The Amazing Spider-Man #39–40,August 1966 – September 1966,Stan Lee,John Romita Sr.,https://en.wikipedia.org/wiki/How_Green_Was_My...
7,Green Goblin Reborn!,The Amazing Spider-Man #96–98,May–July 1971,Stan Lee,Gil Kane,https://en.wikipedia.org/wiki/Green_Goblin_Reb...
8,The Six Arms Saga,The Amazing Spider-Man #100–102,September–November 1971,Stan Lee Roy Thomas,Gil Kane,https://en.wikipedia.org/wiki/The_Six_Arms_Saga
9,The Night Gwen Stacy Died,The Amazing Spider-Man #121–122,June–July 1973,Gerry Conway,Gil Kane,https://en.wikipedia.org/wiki/The_Night_Gwen_S...


Scrapping Info about each major Comic storyline

In [305]:
# --- 1. A robust function to get details from a Wikipedia page ---
def get_wiki_details(url):
    """
    Visits a Wikipedia URL to scrape the synopsis and cover image using a two-tiered approach.
    Always returns two values to prevent errors.
    """
    if not isinstance(url, str) or not url.startswith('http'):
        return "Invalid URL", "Invalid URL"

    try:
        # Define the headers INSIDE the function to be safe, or pass them as an argument
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        # The headers argument is now included in this request
        response = requests.get(url, headers=headers)

        if response.status_code != 200:
            return f"Failed to retrieve page (Status: {response.status_code})", "Failed to retrieve page"

        soup = BeautifulSoup(response.content, 'html.parser')

        # --- Scrape Synopsis with Fallback Logic ---
        synopsis = "Synopsis not found"
        # Corrected plot_header to look for <h2> tags
        plot_header = soup.find('h2', string=re.compile(r'Plot|Synopsis|Summary', re.IGNORECASE))
        if plot_header:
            synopsis_p = plot_header.find_next('p')

            if synopsis_p:
                synopsis = synopsis_p.get_text(strip=True)

        if synopsis == "Synopsis not found":
            paragraphs = soup.select('#mw-content-text .mw-parser-output > p')
            if len(paragraphs) > 1:
                synopsis = paragraphs[0].get_text(strip=True) + " " + paragraphs[1].get_text(strip=True)
            elif len(paragraphs) == 1:
                synopsis = paragraphs[0].get_text(strip=True)

        synopsis = re.sub(r'\[\d+\]', '', synopsis)

        # --- Scrape Image ---
        image_url = "Image not found"
        infobox = soup.find('table', {'class': 'infobox'})
        if infobox:
            image_tag = infobox.find('img')
            if image_tag:
                image_src = image_tag.get('src')
                if image_src.startswith('//'):
                    image_url = 'https:' + image_src
                else:
                    image_url = image_src

        return synopsis, image_url

    except Exception as e:
        return f"Error during scraping: {e}", f"Error during scraping: {e}"

# --- Main Loop ---
synopses = []
images = []

for link in df_arcs['link']:
    print(f"Scraping details from: {link}")
    synopsis, image_url = get_wiki_details(link)
    synopses.append(synopsis)
    images.append(image_url)
    time.sleep(1)

df_arcs['synopsis'] = synopses
df_arcs['image_url'] = images

print("\n--- Scraping complete! ---")
df_arcs.head()


Scraping details from: https://en.wikipedia.org/wiki/If_This_Be_My_Destiny...!
Scraping details from: https://en.wikipedia.org/wiki/How_Green_Was_My_Goblin!
Scraping details from: https://en.wikipedia.org/wiki/Green_Goblin_Reborn!
Scraping details from: https://en.wikipedia.org/wiki/The_Six_Arms_Saga
Scraping details from: https://en.wikipedia.org/wiki/The_Night_Gwen_Stacy_Died
Scraping details from: https://en.wikipedia.org/wiki/Clone_Saga
Scraping details from: https://en.wikipedia.org/wiki/Nothing_Can_Stop_the_Juggernaut!
Scraping details from: https://en.wikipedia.org/wiki/The_Kid_Who_Collects_Spider-Man
Scraping details from: https://en.wikipedia.org/wiki/Secret_Wars
Scraping details from: https://en.wikipedia.org/wiki/Alien_Costume_Saga
Scraping details from: https://en.wikipedia.org/wiki/The_Death_of_Jean_DeWolff
Scraping details from: https://en.wikipedia.org/wiki/The_Wedding!_(comics)
Scraping details from: https://en.wikipedia.org/wiki/Kraven%27s_Last_Hunt
Scraping details fr

Unnamed: 0,Storyline,Issues,Publication_Date,Writers,Pencillers,link,synopsis,image_url
0,If This Be My Destiny...!,The Amazing Spider-Man #31–33,December 1965 – February 1966,Stan Lee Steve Ditko,Steve Ditko,https://en.wikipedia.org/wiki/If_This_Be_My_De...,"The Amazing Spider-Man#31: ""If This Be My Dest...",https://upload.wikimedia.org/wikipedia/en/thum...
1,How Green Was My Goblin!,The Amazing Spider-Man #39–40,August 1966 – September 1966,Stan Lee,John Romita Sr.,https://en.wikipedia.org/wiki/How_Green_Was_My...,ThesuperheroSpider-Manhas appeared in manyAmer...,Image not found
7,Green Goblin Reborn!,The Amazing Spider-Man #96–98,May–July 1971,Stan Lee,Gil Kane,https://en.wikipedia.org/wiki/Green_Goblin_Reb...,"Issue #96 begins withPeter Parker, who is low ...",https://upload.wikimedia.org/wikipedia/en/c/cf...
8,The Six Arms Saga,The Amazing Spider-Man #100–102,September–November 1971,Stan Lee Roy Thomas,Gil Kane,https://en.wikipedia.org/wiki/The_Six_Arms_Saga,Peter Parker has recently experienced a lot of...,https://upload.wikimedia.org/wikipedia/en/thum...
9,The Night Gwen Stacy Died,The Amazing Spider-Man #121–122,June–July 1973,Gerry Conway,Gil Kane,https://en.wikipedia.org/wiki/The_Night_Gwen_S...,"Prior to this arc,Norman Osbornhad been the Gr...",https://upload.wikimedia.org/wikipedia/en/thum...


In [308]:
df_arcs = df_arcs[df_arcs['image_url'] != "Image not found"].copy()
df_arcs.head()

Unnamed: 0,Storyline,Issues,Publication_Date,Writers,Pencillers,link,synopsis,image_url
0,If This Be My Destiny...!,The Amazing Spider-Man #31–33,December 1965 – February 1966,Stan Lee Steve Ditko,Steve Ditko,https://en.wikipedia.org/wiki/If_This_Be_My_De...,"The Amazing Spider-Man#31: ""If This Be My Dest...",https://upload.wikimedia.org/wikipedia/en/thum...
7,Green Goblin Reborn!,The Amazing Spider-Man #96–98,May–July 1971,Stan Lee,Gil Kane,https://en.wikipedia.org/wiki/Green_Goblin_Reb...,"Issue #96 begins withPeter Parker, who is low ...",https://upload.wikimedia.org/wikipedia/en/c/cf...
8,The Six Arms Saga,The Amazing Spider-Man #100–102,September–November 1971,Stan Lee Roy Thomas,Gil Kane,https://en.wikipedia.org/wiki/The_Six_Arms_Saga,Peter Parker has recently experienced a lot of...,https://upload.wikimedia.org/wikipedia/en/thum...
9,The Night Gwen Stacy Died,The Amazing Spider-Man #121–122,June–July 1973,Gerry Conway,Gil Kane,https://en.wikipedia.org/wiki/The_Night_Gwen_S...,"Prior to this arc,Norman Osbornhad been the Gr...",https://upload.wikimedia.org/wikipedia/en/thum...
10,The Original Clone Saga,The Amazing Spider-Man #139–150 Giant-Size Spi...,December 1974 – November 1975 December 1978 – ...,Gerry Conway Bill Mantlo Various,Ross Andru Jim Mooney Various,https://en.wikipedia.org/wiki/Clone_Saga,"The ""Clone Saga"" is an extendedcomic-bookstory...",https://upload.wikimedia.org/wikipedia/en/thum...


## Sentimental Analysis of every Story Arc

Installing praw and setting up Reddit API

In [309]:
!pip install praw



In [310]:
import praw

reddit = praw.Reddit(
    client_id='ohnDAWPy0EwkrsijHHFOLQ',
    client_secret='bdv_46Qdn28MQyZgeiSNJTTFhNsOBQ',
    user_agent='DailyBugleReport by u/Substantial-Tax-5966'
)
print("Successfully connected to Reddit API.")

Successfully connected to Reddit API.


Scraping Comments for Each Story Arc

In [311]:
def clean_title_for_search(full_title):
    if ':' in full_title:
        title = full_title.split(':')[-1].strip()
    else:
        title = full_title

    title = title.lower()
    title = re.sub(r'[^\w\s]', '', title)

    return title

def get_reddit_comments(story_title, limit=100):
    try:
        subreddit = reddit.subreddit('Spiderman')
        search_query = clean_title_for_search(story_title)

        print(f"Searching Reddit for: '{search_query}'...")
        submission = next(subreddit.search(search_query, sort='top', limit=10))
        submission.comments.replace_more(limit=0)
        comments = [comment.body for comment in submission.comments.list()]

        print(f"Found {len(comments)} comments for '{story_title}'")
        return comments[:limit]

    except Exception as e:
        print(f"Could not find comments for '{story_title}'.")
        return []

df_arcs['fan_comments'] = df_arcs['Storyline'].apply(get_reddit_comments)

print("\n--- Fan comment scraping complete! ---")

df_arcs['comment_count'] = df_arcs['fan_comments'].apply(len)
original_count = len(df_arcs)

df_arcs = df_arcs[df_arcs['comment_count'] > 0].copy()
final_count = len(df_arcs)

print(f"\nRemoved {original_count - final_count} story arcs that had no fan comments.")
print(f"Final dataset has {final_count} story arcs with fan sentiment to analyze.")
df_arcs.head()

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Searching Reddit for: 'if this be my destiny'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 4 comments for 'If This Be My Destiny...!'
Searching Reddit for: 'green goblin reborn'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 460 comments for 'Green Goblin Reborn!'
Searching Reddit for: 'the six arms saga'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 120 comments for 'The Six Arms Saga'
Searching Reddit for: 'the night gwen stacy died'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 320 comments for 'The Night Gwen Stacy Died'
Searching Reddit for: 'the original clone saga'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 24 comments for 'The Original Clone Saga'
Searching Reddit for: 'nothing can stop the juggernaut'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 182 comments for 'Nothing Can Stop the Juggernaut!'
Searching Reddit for: 'the kid who collects spiderman'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 83 comments for 'The Kid Who Collects Spider-Man'
Searching Reddit for: 'secret wars'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 487 comments for 'Secret Wars'
Searching Reddit for: 'the alien costume saga'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 263 comments for 'The Alien Costume Saga'
Searching Reddit for: 'the death of jean dewolff'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 48 comments for 'The Death of Jean DeWolff'
Searching Reddit for: 'the wedding'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 394 comments for 'The Wedding!'
Searching Reddit for: 'kravens last hunt'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 30 comments for 'Fearful Symmetry: Kraven's Last Hunt'
Searching Reddit for: 'torment'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 336 comments for 'Torment'
Searching Reddit for: 'the return of the sinister six'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 426 comments for 'The Return of the Sinister Six'
Searching Reddit for: 'maximum carnage'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 48 comments for 'Maximum Carnage'
Searching Reddit for: 'the clone saga'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 80 comments for 'The Clone Saga'
Searching Reddit for: 'planet of the symbiotes'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 49 comments for 'Planet of the Symbiotes'
Searching Reddit for: 'the gathering of five'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 23 comments for 'The Gathering of Five'
Searching Reddit for: 'the final chapter'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 153 comments for 'The Final Chapter'
Searching Reddit for: 'flowers for rhino'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 37 comments for 'Flowers for Rhino'
Searching Reddit for: 'the other'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 491 comments for 'The Other'
Searching Reddit for: 'civil war'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 37 comments for 'Civil War'
Searching Reddit for: 'back in black'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 126 comments for 'Back in Black'
Searching Reddit for: 'one more day'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 400 comments for 'One More Day'
Searching Reddit for: 'brand new day'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 71 comments for 'Brand New Day'
Searching Reddit for: 'new ways to die'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 234 comments for 'New Ways to Die'
Searching Reddit for: 'died in your arms tonight'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Could not find comments for 'Died in Your Arms Tonight'.
Searching Reddit for: 'redheaded stranger'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 2 comments for 'Red-Headed Stranger'
Searching Reddit for: 'return of the black cat'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 235 comments for 'Return of the Black Cat'
Searching Reddit for: 'the gauntlet'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 290 comments for 'The Gauntlet'
Searching Reddit for: 'grim hunt'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 27 comments for 'Grim Hunt'
Searching Reddit for: 'one moment in time'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 474 comments for 'One Moment in Time'
Searching Reddit for: 'big time'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 288 comments for 'Big Time'
Searching Reddit for: 'spiderisland'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 24 comments for 'Spider-Island'
Searching Reddit for: 'ends of the earth'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 37 comments for 'Ends of the Earth'
Searching Reddit for: 'dying wish'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 54 comments for 'Dying Wish'
Searching Reddit for: 'spiderverse'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 172 comments for 'Spider-Verse'
Searching Reddit for: 'the clone conspiracy'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 95 comments for 'Dead No More: The Clone Conspiracy'
Searching Reddit for: 'spidergeddon'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 57 comments for 'Spider-Geddon'
Searching Reddit for: 'hunted'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 297 comments for 'Hunted'
Searching Reddit for: 'absolute carnage'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 79 comments for 'Absolute Carnage'
Searching Reddit for: 'sinister war'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 48 comments for 'Sinister War'
Searching Reddit for: 'dark web'...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Found 450 comments for 'Dark Web'

--- Fan comment scraping complete! ---

Removed 1 story arcs that had no fan comments.
Final dataset has 42 story arcs with fan sentiment to analyze.


Unnamed: 0,Storyline,Issues,Publication_Date,Writers,Pencillers,link,synopsis,image_url,fan_comments,comment_count
0,If This Be My Destiny...!,The Amazing Spider-Man #31–33,December 1965 – February 1966,Stan Lee Steve Ditko,Steve Ditko,https://en.wikipedia.org/wiki/If_This_Be_My_De...,"The Amazing Spider-Man#31: ""If This Be My Dest...",https://upload.wikimedia.org/wikipedia/en/thum...,"[Okay that's cool as fuck., ""come on Spider-Ma...",4
7,Green Goblin Reborn!,The Amazing Spider-Man #96–98,May–July 1971,Stan Lee,Gil Kane,https://en.wikipedia.org/wiki/Green_Goblin_Reb...,"Issue #96 begins withPeter Parker, who is low ...",https://upload.wikimedia.org/wikipedia/en/c/cf...,[Finish it! FINISH IT!!\n\nMemes aside this is...,100
8,The Six Arms Saga,The Amazing Spider-Man #100–102,September–November 1971,Stan Lee Roy Thomas,Gil Kane,https://en.wikipedia.org/wiki/The_Six_Arms_Saga,Peter Parker has recently experienced a lot of...,https://upload.wikimedia.org/wikipedia/en/thum...,[“There was the ridiculously pointless and ove...,100
9,The Night Gwen Stacy Died,The Amazing Spider-Man #121–122,June–July 1973,Gerry Conway,Gil Kane,https://en.wikipedia.org/wiki/The_Night_Gwen_S...,"Prior to this arc,Norman Osbornhad been the Gr...",https://upload.wikimedia.org/wikipedia/en/thum...,"[MJ who???, I think the best part of this stor...",100
10,The Original Clone Saga,The Amazing Spider-Man #139–150 Giant-Size Spi...,December 1974 – November 1975 December 1978 – ...,Gerry Conway Bill Mantlo Various,Ross Andru Jim Mooney Various,https://en.wikipedia.org/wiki/Clone_Saga,"The ""Clone Saga"" is an extendedcomic-bookstory...",https://upload.wikimedia.org/wikipedia/en/thum...,[Then along came Slott and Wells to tear him a...,24


Cleaning Fan comments

In [312]:
!pip install nltk
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize



In [313]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import re

nltk.download('punkt_tab')
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))

def clean_comments(comments):
    """A function to clean a single comment string."""
    comments = comments.lower()

    comments = re.sub(r'http\S+|www\S+|https\S+', '', comments, flags=re.MULTILINE)
    comments = re.sub(r'\@\w+|\#', '', comments)
    comments = re.sub(r'[^a-zA-Z\s]', '', comments)

    tokens = word_tokenize(comments)
    filtered_tokens = [word for word in tokens if word.isalnum() and word not in stop_words]
    return ' '.join(filtered_tokens)

df_arcs['cleaned_comments'] = df_arcs['fan_comments'].apply(
    lambda comment_list: [clean_comments(comment) for comment in comment_list]
)

print("Successfully cleaned all fan comments.")
df_arcs[['Storyline', 'fan_comments', 'cleaned_comments']].head()

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Successfully cleaned all fan comments.


Unnamed: 0,Storyline,fan_comments,cleaned_comments
0,If This Be My Destiny...!,"[Okay that's cool as fuck., ""come on Spider-Ma...","[okay thats cool fuck, come spiderman gets eve..."
7,Green Goblin Reborn!,[Finish it! FINISH IT!!\n\nMemes aside this is...,"[finish finish memes aside pretty badass, remo..."
8,The Six Arms Saga,[“There was the ridiculously pointless and ove...,[ridiculously pointless overhyped death kamala...
9,The Night Gwen Stacy Died,"[MJ who???, I think the best part of this stor...","[mj, think best part story gwen died mexican p..."
10,The Original Clone Saga,[Then along came Slott and Wells to tear him a...,"[along came slott wells tear apart, still dont..."


Performing Sentiment Analysis on Fan Comments

In [314]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import numpy as np
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [315]:
analyzer = SentimentIntensityAnalyzer()

def analyze_fan_sentiment(comment_list):
    if not comment_list:
        return 0
    scores = [analyzer.polarity_scores(comment)['compound'] for comment in comment_list]
    return np.mean(scores)

df_arcs['fan_sentiment_score'] = df_arcs['cleaned_comments'].apply(analyze_fan_sentiment)

print("Successfully analyzed the sentiment of all fan comments!")
df_arcs[['Storyline', 'synopsis', 'fan_sentiment_score']].head()

Successfully analyzed the sentiment of all fan comments!


Unnamed: 0,Storyline,synopsis,fan_sentiment_score
0,If This Be My Destiny...!,"The Amazing Spider-Man#31: ""If This Be My Dest...",0.2513
7,Green Goblin Reborn!,"Issue #96 begins withPeter Parker, who is low ...",0.265917
8,The Six Arms Saga,Peter Parker has recently experienced a lot of...,-0.051891
9,The Night Gwen Stacy Died,"Prior to this arc,Norman Osbornhad been the Gr...",0.074787
10,The Original Clone Saga,"The ""Clone Saga"" is an extendedcomic-bookstory...",0.234467


In [317]:
!pip install streamlit
!pip install pyngrok



In [294]:
%%writefile app.py
import streamlit as st
import pandas as pd
import base64
import ast

def set_video_background(video_file):
    with open(video_file, "rb") as video:
        video_bytes = video.read()
    video_base64 = base64.b64encode(video_bytes).decode()

    st.markdown(
        f"""
        <style>
        @import url('https://fonts.googleapis.com/css2?family=Playfair+Display:wght@700&family=Roboto:wght@400;700&display=swap');

        /* --- General Setup & Background --- */
        .stApp {{
            background-color: transparent;
        }}
        #bg-video {{
            position: fixed; top: 0; left: 0; width: 100vw; height: 100vh;
            object-fit: cover; z-index: -1; filter: blur(4px); opacity: 0.5;
        }}

        /* --- Main Content Container --- */
        .main .block-container {{
            background-color: rgba(10, 10, 20, 0.9); /* Darker overlay for better text contrast */
            border-radius: 15px;
            padding: 2rem;
            backdrop-filter: blur(10px);
            border: 1px solid rgba(255, 255, 255, 0.1);
        }}

        /* --- Themed Elements & Typography --- */
        h1 {{
            font-family: 'Playfair Display', serif; /* Newspaper font */
            color: #FFFFFF;
            font-size: 3.2em !important;
            text-align: center;
            padding-bottom: 1.5rem;
        }}
        h3.subtitle {{
            text-align: center;
            color: #CCCCCC;
            font-family: 'Roboto', sans-serif;
            margin-top: -2rem; /* Pull subtitle closer to title */
            margin-bottom: 2rem;
        }}
        h2, h3, h4, h5 {{
            color: #E62429; /* Spider-Man Red */
            font-family: 'Roboto', sans-serif;
            font-weight: bold;
        }}
        p, .stMarkdown, .stMetric {{
            font-family: 'Roboto', sans-serif;
            font-size: 1.1em !important; /* Slightly adjusted text size for balance */
        }}

        /* --- Dropdown Selector (Half-width, right-aligned) --- */
        .stSelectbox div[data-baseweb="select"] > div {{
            border: 2px solid #0056b3; /* Spider-Man Blue */
        }}

        /* --- Professional Detail Cards --- */
        .detail-card {{
            background-color: rgba(255, 255, 255, 0.05);
            border-radius: 6px;
            padding: 0.5rem 0.8rem; /* Compact padding */
            margin-bottom: 0.4rem;
            border-left: 4px solid #0056b3;
        }}
        .detail-card strong {{
            color: #E62429; /* Red for emphasis */
        }}

        /* --- High-Contrast Synopsis & Comments Box --- */
        .content-box {{
            background-color: rgba(0, 0, 0, 0.25);
            border-radius: 8px;
            padding: 1rem;
            margin-top: 1rem;
        }}
        .content-box p {{
            color: #f0f2f6;
            line-height: 1.5;
            font-size: 1.2em !important;
        }}
        .comments-container {{
            height: 180px; /* Reduced height to prevent scrolling */
            overflow-y: auto;
            border-radius: 5px;
            padding-right: 10px; /* Space for scrollbar */
        }}
        .comment-box {{
            border-bottom: 1px solid rgba(255, 255, 255, 0.1);
            padding: 0.5rem 0;
            margin-bottom: 0.5rem;
            font-size: 1em;
        }}

        /* --- Default Welcome View Styling --- */
        .welcome-container img {{
            border-radius: 10px;
        }}
        .welcome-container p {{
            font-size: 1.2em;
            line-height: 1.5;
        }}
        </style>

        <video autoplay muted loop id="bg-video">
            <source src="data:video/mp4;base64,{video_base64}" type="video/mp4">
        </video>
        """,
        unsafe_allow_html=True
    )

# --- Load Your Data ---
try:
    df = pd.read_csv('spiderman_sentiment_data.csv')
except FileNotFoundError:
    st.error("Error: 'spiderman_sentiment_data.csv' not found. Please run the data collection notebook first.")
    st.stop()

st.set_page_config(page_title="The Daily Bugle Report", layout="wide")
set_video_background('Bg.mp4')

# --- App Header ---
st.title("THE DAILY BUGLE REPORT")

# --- Interactive Selector (Right-Aligned) ---
_, col2 = st.columns([1, 1])
with col2:
    storyline_options = ["-- Select an Arc to Analyze --"] + df['Storyline'].tolist()
    selected_arc = st.selectbox("", storyline_options, label_visibility="collapsed")

# --- Display Area ---
if selected_arc != "-- Select an Arc to Analyze --":
    arc_data = df[df['Storyline'] == selected_arc].iloc[0]

    col1, col2 = st.columns([1, 2]) # Ratio for a smaller image

    with col1:
        placeholder_image = "https://placehold.co/400x600/0a0a14/E62429?text=COVER+ART%0ANOT+AVAILABLE"
        image_to_display = arc_data['image_url'] if pd.notna(arc_data['image_url']) else placeholder_image
        st.image(image_to_display, use_column_width=True)

    with col2:
        st.header(f"{arc_data['Storyline']}")

        # Display details in styled "cards"
        st.markdown(f'<div class="detail-card"><strong>Publication Date:</strong> {arc_data["Publication_Date"]}</div>', unsafe_allow_html=True)
        st.markdown(f'<div class="detail-card"><strong>Issue(s):</strong> {arc_data["Issues"]}</div>', unsafe_allow_html=True)
        st.markdown(f'<div class="detail-card"><strong>Writer(s):</strong> {arc_data["Writers"]}</div></div>', unsafe_allow_html=True)
        st.markdown(f'<div class="detail-card"><strong>Penciller(s):</strong> {arc_data["Pencillers"]}</div></div>', unsafe_allow_html=True)

        # Synopsis (now in the right column)
        st.markdown("<h5>SYNOPSIS</h5>", unsafe_allow_html=True)
        st.markdown(f'<div class="content-box"><p>{arc_data["synopsis"]}</p></div>', unsafe_allow_html=True)

        sub_col1, sub_col2 = st.columns(2)

        with sub_col1:
            st.markdown("<h5>THE PEOPLE'S VERDICT</h5>", unsafe_allow_html=True)
            st.metric(label="Average Fan Sentiment", value=f"{arc_data['fan_sentiment_score']:.2f}")
            st.progress((arc_data['fan_sentiment_score'] + 1) / 2)

        with sub_col2:
            st.markdown("<h5>TOP FAN COMMENTS</h5>", unsafe_allow_html=True)
            try:
                if pd.notna(arc_data['fan_comments']):
                    fan_comments = ast.literal_eval(arc_data['fan_comments'])
                    if fan_comments:
                        comment_html = ""
                        for comment in fan_comments:
                            comment_html += f'<div class="comment-box">{comment}</div>'
                        st.markdown(f'<div class="comments-container">{comment_html}</div>', unsafe_allow_html=True)
                    else:
                        st.info("No fan comments were found.")
                else:
                     st.info("No fan comments were found.")
            except (ValueError, SyntaxError):
                st.error("Could not parse comments.")

else:
    # --- Default Welcome View ---
    col1, col2 = st.columns([1, 2])
    with col1:
        st.image("default.jpg", use_column_width=True)
    with col2:
        st.markdown("""
        <div class="welcome-container">
            <h2>Welcome to The Daily Bugle Report!</h2>
            <p>Your source for public opinion on all things Spider-Man. This dashboard uses data science deliver the verdict on the web-slinger's most iconic comic moments.</p>
            <p><strong>How it works:</strong> This project scrapes wiki to look for all iconic spiderman moments and then analyzes fan comments from reddit to calculate a sentiment score for each major story arc. It combines web scraping, natural language processing (NLP), and interactive data visualization to bring you the definitive fan perspective.</p>
            <p><strong>To get started, select a story arc from the dropdown menu above.</strong></p>
        </div>
        """, unsafe_allow_html=True)



Overwriting app.py


In [318]:
from pyngrok import ngrok
import getpass

df_arcs.to_csv('spiderman_sentiment_data.csv', index=False)

authtoken = getpass.getpass("Enter your ngrok authtoken (from dashboard.ngrok.com): ")
ngrok.set_auth_token(authtoken)

public_url = ngrok.connect(8501)
print(f"Your Streamlit app is live at: {public_url}")

!streamlit run app.py --server.port 8501

Enter your ngrok authtoken (from dashboard.ngrok.com): ··········
Your Streamlit app is live at: NgrokTunnel: "https://0ecf075f8a53.ngrok-free.app" -> "http://localhost:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.23.98.95:8501[0m
[0m
2025-09-21 13:38:05.294 `label` got an empty value. This is discouraged for accessibility reasons and may be disallowed in the future by raising an exception. Please provide a non-empty label and hide it with label_visibility if needed.
Stack (most recent call last):
  File "/usr/lib/python3.12/threading.py", line 1032, in _bootstrap
    self._bootstrap_inner()
  File "/usr/lib/python3.12/threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.12/threadi