# Web Scraping: Twitch UserVoice

## Imports

In [44]:
import requests
from bs4 import BeautifulSoup
import csv
import os
import numpy as np

## Final Scraper for "Chat" category

In [69]:
# Target URL
base_url = "https://twitch.uservoice.com/forums/310201-chat?filter=top&page="
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1


# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    #if page_no > 15:# Stop after 15 pages
    #    break
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/310201-chat/suggestions/" in a['href'] and
              "#comments" not in a['href']]
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Chat'
            
            # Grabbing detailed information
            link = detailed_links[idx]
            detailed_response = requests.get(detailed_url + link)
            detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

            # Extract author and date
            author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
            date = detailed_soup.find('time').get_text(strip=True)
            
            # Write to the CSV
            writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Scraping completed and data is saved to twitch_ideas.csv


===========================================================================================================================

## Scraper for other categories
* Account Management - [X]
* Ads - [x]
* Badges/Emotes - [x]
* Bits - [x]
* Channel Page - [x]
* Channel Points - [x]
* Charity - [x]
* Chat - [x]
* Creator Camp - [x]
* Creator Dashboard - [x]
* Creator Dashboard: Stream Manager - [x]
* Creators and Stream Features - [x]
* Customer Experience - [x]
* Developers - [x]
* Discover - [x]
* Extensions - [x]
* IGDB - [x]
* IRL Events and Merch - [x]
* Localization - [x]
* Moderation - [x]
* Purchase Management - [x]
* Safety - [x]
* Subscriptions - [x]
* Twitch Applications: Consoles - [x]
* Twitch Applications: Mobile - [x]
* Twitch Applications: TV Apps - [x]
* Twitch Studio - [x]
* User Accessibility - []
* Video Features - []
* Video Performance - []

## Account Manegment

In [75]:
base_url = "https://twitch.uservoice.com/forums/310228-account-management?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/310228-account-management/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Account Management' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            if link == 'https://twitch.uservoice.com/forums/310228-account-management/suggestions/46216468-phone-number-issue':
                author = np.nan
                date = np.nan
                continue
            detailed_response = requests.get(detailed_url + link)
            detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

            # Extract author and date
            author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
            date = detailed_soup.find('time').get_text(strip=True)
            
            # Write to the CSV
            writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")


AttributeError: 'NoneType' object has no attribute 'get_text'

## Ads

In [77]:
base_url = "https://twitch.uservoice.com/forums/310237-ads?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/310237-ads/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Ads' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            detailed_response = requests.get(detailed_url + link)
            detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

            # Extract author and date
            author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
            date = detailed_soup.find('time').get_text(strip=True)
            
            # Write to the CSV
            writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Scraping completed and data is saved to twitch_ideas.csv


## Badges/Emotes

In [78]:
base_url = "https://twitch.uservoice.com/forums/928738-badges-emotes?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/928738-badges-emotes/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Badges/Emotes' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            detailed_response = requests.get(detailed_url + link)
            detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

            # Extract author and date
            author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
            date = detailed_soup.find('time').get_text(strip=True)
            
            # Write to the CSV
            writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Scraping completed and data is saved to twitch_ideas.csv


## Bits

In [79]:
base_url = "https://twitch.uservoice.com/forums/921826-bits?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/921826-bits/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Bits' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            detailed_response = requests.get(detailed_url + link)
            detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

            # Extract author and date
            author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
            date = detailed_soup.find('time').get_text(strip=True)
            
            # Write to the CSV
            writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Scraping completed and data is saved to twitch_ideas.csv


## Channel Page

In [87]:
base_url = "https://twitch.uservoice.com/forums/928429-channel-page?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/928429-channel-page/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Channel Page' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Error processing link https://twitch.uservoice.com/forums/928429-channel-page/suggestions/40008394-donation-url-in-social-links: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/928429-channel-page/suggestions/40008394-donation-url-in-social-links (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F4659B5A30>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error processing link https://twitch.uservoice.com/forums/928429-channel-page/suggestions/40379764-custom-links-should-use-site-favicons-not-a-hyper: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/928429-channel-page/suggestions/40379764-custom-links-should-use-site-favicons-not-a-hyper (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F4659B5A00>: Failed to establish a new connection: 

## Channel Points

In [88]:
base_url = "https://twitch.uservoice.com/forums/932221-channel-points?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/932221-channel-points/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Channel Points' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Scraping completed and data is saved to twitch_ideas.csv


## Charity

In [89]:
base_url = "https://twitch.uservoice.com/forums/945934-charity?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/945934-charity/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Charity' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 

## Creator Camp

In [91]:
base_url = "https://twitch.uservoice.com/forums/926239-creator-camp?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/926239-creator-camp/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Creator Camp' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

## Creator Dashboard

In [92]:
base_url = "https://twitch.uservoice.com/forums/924712-creator-dashboard?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/924712-creator-dashboard/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Creator Dashboard' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Scraping completed and data is saved to twitch_ideas.csv


## Creator Dashboard: Stream Manager

In [93]:
base_url = "https://twitch.uservoice.com/forums/933484-creator-dashboard-stream-manager?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/933484-creator-dashboard-stream-manager/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Creator Dashboard: Stream Manager' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Scraping completed and data is saved to twitch_ideas.csv


## Creators and Stream Features [x]

In [94]:
base_url = "https://twitch.uservoice.com/forums/923383-creators-and-stream-features?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/923383-creators-and-stream-features/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Creators and Stream Features' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Scraping completed and data is saved to twitch_ideas.csv


## Customer Experience [x]

In [95]:
base_url = "https://twitch.uservoice.com/forums/934332-customer-experience?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/934332-customer-experience/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Customer Experience' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Error processing link https://twitch.uservoice.com/forums/934332-customer-experience/suggestions/46087618-stop-merging-ideas-without-first-confirming-with-t: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/934332-customer-experience/suggestions/46087618-stop-merging-ideas-without-first-confirming-with-t (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F52281EE20>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error processing link https://twitch.uservoice.com/forums/934332-customer-experience/suggestions/46087618-stop-merging-ideas-without-first-confirming-with-t: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/934332-customer-experience/suggestions/46087618-stop-merging-ideas-without-first-confirming-with-t (Caused by NewConnectionError('<urllib3.connection.HTTPSConnec

## Developers [x]

In [96]:
base_url = "https://twitch.uservoice.com/forums/310213-developers?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/310213-developers/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Developers' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Error processing link https://twitch.uservoice.com/forums/310213-developers/suggestions/43813413-provide-a-gift-event-id-in-the-channel-subscribe: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/310213-developers/suggestions/43813413-provide-a-gift-event-id-in-the-channel-subscribe (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F52A83DA90>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error processing link https://twitch.uservoice.com/forums/310213-developers/suggestions/42454273-subscriber-upgrades: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/310213-developers/suggestions/42454273-subscriber-upgrades (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F52A83DA60>: Failed to establish a new connection: [Errno 11001] getaddrinfo fa

Error processing link https://twitch.uservoice.com/forums/310213-developers/suggestions/39547780-emotes-by-channelid: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/310213-developers/suggestions/39547780-emotes-by-channelid (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F52F7AE1C0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error processing link https://twitch.uservoice.com/forums/310213-developers/suggestions/41137957-upload-emotes: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/310213-developers/suggestions/41137957-upload-emotes (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F52F7AE190>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error processing link https://twitch.uservoice.com/forums/3102

Scraping completed and data is saved to twitch_ideas.csv


## Discover [x]

In [97]:
base_url = "https://twitch.uservoice.com/forums/310210-discover?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/310210-discover/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Discover' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Error processing link https://twitch.uservoice.com/forums/310210-discover/suggestions/47257001-channels-marked-not-interested-not-syncing-acros: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/310210-discover/suggestions/47257001-channels-marked-not-interested-not-syncing-acros (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F5823D13A0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error processing link https://twitch.uservoice.com/forums/310210-discover/suggestions/44079120-create-a-religions-faith-and-beliefs-category: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/310210-discover/suggestions/44079120-create-a-religions-faith-and-beliefs-category (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F59ABF3100>: Failed to establish a 

## Extensions [x]

In [98]:
base_url = "https://twitch.uservoice.com/forums/904711-extensions?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/904711-extensions/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Extensions' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Scraping completed and data is saved to twitch_ideas.csv


## IGDB [x]

In [99]:
base_url = "https://twitch.uservoice.com/forums/929953-igdb?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/929953-igdb/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'IGDB' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Scraping completed and data is saved to twitch_ideas.csv


## IRL Events and Merch [x]

In [101]:
base_url = "https://twitch.uservoice.com/forums/924751-irl-events-and-merch?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/924751-irl-events-and-merch/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'IRL Events and Merch' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Scraping completed and data is saved to twitch_ideas.csv


## Localization [x]

In [102]:
base_url = "https://twitch.uservoice.com/forums/924619-localization?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/924619-localization/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Localization' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Error processing link https://twitch.uservoice.com/forums/924619-localization/suggestions/43430019-add-ukrainian-localization-please: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/924619-localization/suggestions/43430019-add-ukrainian-localization-please (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F5C5618190>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error processing link https://twitch.uservoice.com/forums/924619-localization/suggestions/43430019-add-ukrainian-localization-please: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/924619-localization/suggestions/43430019-add-ukrainian-localization-please (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F5C5618160>: Failed to establish a new connection: [Errno 11001] getaddri

## Moderation [x]

In [103]:
base_url = "https://twitch.uservoice.com/forums/951706-moderation?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/951706-moderation/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Moderation' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Error processing link https://twitch.uservoice.com/forums/951706-moderation/suggestions/42550657-mobile-moderation: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/951706-moderation/suggestions/42550657-mobile-moderation (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F5DA661700>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error processing link https://twitch.uservoice.com/forums/951706-moderation/suggestions/42550657-mobile-moderation: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/951706-moderation/suggestions/42550657-mobile-moderation (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F5DA6616D0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Scraping completed and data is saved to twitch_ideas.csv


## Purchase Management [x]

In [104]:
base_url = "https://twitch.uservoice.com/forums/929338-purchase-management?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/929338-purchase-management/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Purchase Management' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Scraping completed and data is saved to twitch_ideas.csv


## Safety [x]

In [105]:
base_url = "https://twitch.uservoice.com/forums/933812-safety?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/933812-safety/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Safety' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Error processing link https://twitch.uservoice.com/forums/933812-safety/suggestions/44529945-sans-serif-fonts: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/933812-safety/suggestions/44529945-sans-serif-fonts (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F5F0797070>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error processing link https://twitch.uservoice.com/forums/933812-safety/suggestions/44529945-sans-serif-fonts: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/933812-safety/suggestions/44529945-sans-serif-fonts (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F5F079A070>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error processing link https://twitch.uservoice.com/forums/933812-safety/sugges

## Subscriptions [x]

In [106]:
base_url = "https://twitch.uservoice.com/forums/310231-subscriptions?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/310231-subscriptions/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Subscriptions' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Error processing link https://twitch.uservoice.com/forums/310231-subscriptions/suggestions/38573176-add-higher-sub-gifting-badges: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/310231-subscriptions/suggestions/38573176-add-higher-sub-gifting-badges (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F628962DC0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error processing link https://twitch.uservoice.com/forums/310231-subscriptions/suggestions/42603280-fair-payments-and-payouts: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/310231-subscriptions/suggestions/42603280-fair-payments-and-payouts (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F62AEF1D00>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error 

## Twitch Applications: Consoles [x]

In [107]:
base_url = "https://twitch.uservoice.com/forums/310219-twitch-applications-consoles?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/310219-twitch-applications-consoles/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Twitch Applications: Consoles' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Scraping completed and data is saved to twitch_ideas.csv


## Twitch Applications: Mobile [x]

In [108]:
base_url = "https://twitch.uservoice.com/forums/310222-twitch-applications-mobile?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/310222-twitch-applications-mobile/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Twitch Applications: Mobile' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Error processing link https://twitch.uservoice.com/forums/310222-twitch-applications-mobile/suggestions/39000541-swipe-to-remove-continue-watching-entry: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/310222-twitch-applications-mobile/suggestions/39000541-swipe-to-remove-continue-watching-entry (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F66BA43430>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Error processing link https://twitch.uservoice.com/forums/310222-twitch-applications-mobile/suggestions/15724485-broadcasts-that-you-ve-recently-watched: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/310222-twitch-applications-mobile/suggestions/15724485-broadcasts-that-you-ve-recently-watched (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0

## Twitch Applications: TV Apps [x]

In [109]:
base_url = "https://twitch.uservoice.com/forums/310225-twitch-applications-tv-apps?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/310225-twitch-applications-tv-apps/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Twitch Applications: TV Apps' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Error processing link https://twitch.uservoice.com/forums/310225-twitch-applications-tv-apps/suggestions/41734411-on-the-lg-webos-app-make-it-so-the-twitch-chat-do: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/310225-twitch-applications-tv-apps/suggestions/41734411-on-the-lg-webos-app-make-it-so-the-twitch-chat-do (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F6767210D0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Scraping completed and data is saved to twitch_ideas.csv


## Twitch Studio [x]

In [111]:
base_url = "https://twitch.uservoice.com/forums/923041-twitch-studio?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/923041-twitch-studio/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Twitch Studio' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Error processing link https://twitch.uservoice.com/forums/923041-twitch-studio/suggestions/38368711-build-in-audio-mixer-to-be-able-to-mute-programs-o: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/923041-twitch-studio/suggestions/38368711-build-in-audio-mixer-to-be-able-to-mute-programs-o (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F6A13367C0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Scraping completed and data is saved to twitch_ideas.csv


## User Accessibility [x]

In [112]:
base_url = "https://twitch.uservoice.com/forums/926080-user-accessibility?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/926080-user-accessibility/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'User Accessibility' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Scraping completed and data is saved to twitch_ideas.csv


## Video Features [x]

In [113]:
base_url = "https://twitch.uservoice.com/forums/923368-video-features?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/923368-video-features/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Video Features' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Error processing link https://twitch.uservoice.com/forums/923368-video-features/suggestions/43874217-remove-update-collection-limit: HTTPSConnectionPool(host='twitch.uservoice.comhttps', port=443): Max retries exceeded with url: //twitch.uservoice.com/forums/923368-video-features/suggestions/43874217-remove-update-collection-limit (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F6E32FB670>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Scraping completed and data is saved to twitch_ideas.csv


## Video Performance [x]

In [115]:
base_url = "https://twitch.uservoice.com/forums/310207-video-performance?filter=top&page="  ## Change this for other cat##
detailed_url = "https://twitch.uservoice.com"
csv_file = 'twitch_ideas.csv'
page_no = 1

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    url = base_url + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/310207-video-performance/suggestions/" in a['href'] and
              "#comments" not in a['href']]  ## Change this for other cat ##
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas): # Needed to seperate in order to have 2 requests
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            if t_tag == None:
                topic = np.nan
            else:
                topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Video Performance' ##Change this for other cat ##
            
            # Grabbing detailed information
            link = detailed_links[idx]
            
            # Had to add this because there were a number of faulty url's
            try:
                detailed_response = requests.get(detailed_url + link)
                detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

                # Extract author and date
                author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
                date = detailed_soup.find('time').get_text(strip=True)

                # Write to the CSV
                writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
            except Exception as e:
                print(f"Error processing link {link}: {e}")
                continue
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas.csv")

Scraping completed and data is saved to twitch_ideas.csv


---------------------------------------------------------------------------------------------------------------------------
## Trial Scraper for chat

In [70]:
# Target URL
BASE_URL = "https://twitch.uservoice.com/forums/310201-chat?filter=top&page="
detailed_url = "https://twitch.uservoice.com"
page_no = 1
csv_file = 'twitch_ideas_test.csv'

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Author', 'Name', 'Description', 'VoteCount', 'NumComments', 'Topic', 'Category', 'Date'])
        
# Iterate through each page
while True:
    if page_no > 2:# Stop after 15 pages
        break
    url = BASE_URL + str(page_no)
    
    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all ideas
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # Find all detailed links
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/310201-chat/suggestions/" in a['href'] and
              "#comments" not in a['href']]
    
    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idx, idea in enumerate(ideas):
            
            # Extract the required data
            name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
            
            # Had to add this check as some ideas dont include descriptions
            description = idea.find('div', class_='typeset')
            if description == None:
                description = np.nan
            else:
                description = description.get_text(strip = True)
            
            vote_count = idea.find('div', class_='uvIdeaVoteCount').get_text(strip = True).replace("votes",'').replace("vote",'').replace(",","").strip()
            vote_count = int(vote_count)

            # Extracting num_comments and topic, had to seperate one class
            meta_div = idea.find('div', class_='uvIdeaMeta')

            c_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
            num_comments = c_tag.get_text(strip=True).replace("comments", "").replace("comment",'').strip()
            num_comments = int(num_comments)

            t_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
            topic = t_tag.get_text(strip=True)
            
            # Initializing category for each scrape
            category = 'Chat'
            
            # Grabbing detailed information
            link = detailed_links[idx]
            detailed_response = requests.get(detailed_url + link)
            detailed_soup = BeautifulSoup(detailed_response.content, 'html.parser')

            # Extract author and date
            author = detailed_soup.find('span', class_='vcard').get_text(strip=True)
            date = detailed_soup.find('time').get_text(strip=True)
            
            # Write to the CSV
            writer.writerow([author, name, description, vote_count, num_comments, topic, category, date])
            
        # Determining if there is a next page, if no page is found, break
        next_page_indicator = soup.find('a', rel='next')
    
        if not next_page_indicator:
            break

        page_no += 1 
print("Scraping completed and data is saved to twitch_ideas_test.csv")

Scraping completed and data is saved to twitch_ideas_test.csv


In [68]:
import pandas as pd
data = pd.read_csv('twitch_ideas.csv')
data['Date'] = pd.to_datetime(data['Date'])
data

Unnamed: 0,Author,Name,Description,VoteCount,NumComments,Topic,Category,Date
0,iateyourpie,Keep Moments,Moments is a feature that Twitch released as a...,4191,453,Stream Chat,Chat,2023-08-31
1,iAndy88,Verified status for everyone and /verifiedonly...,I would appreciate a very helpful feature for ...,4049,246,Feature Request,Chat,2021-01-12
2,GohgoDude,Channel Points Leaderboard,Provide two leaderboards for channels that hav...,1883,21,Leaderboards,Chat,2020-01-08
3,Victor_sueca,/spoiler command,Users sending a spoiler could use this command...,817,15,Commands,Chat,2016-12-17
4,xSwagzy,Birthday Badge,It would be nice to have a birthday badge for ...,597,11,Feature Request,Chat,2019-11-27
5,LunarGabriel,Let chants be optional instead of removing them,I know Twitch says the feature seemed to cause...,573,42,Feature Request,Chat,2022-01-25
6,subwithpr1me,Ability to turn off chat reply/threads feature,The chat replies feature may be cool for some ...,513,41,Replies,Chat,2020-08-10
7,shiinto1993,Delete your own message,"It would be very nice, if it's possible to del...",507,28,Stream Chat,Chat,2019-10-08
8,"AdminTwitch(Admin, Twitch)",[Test] Crowd Chant,"Some communities will notice a new way to ""cha...",441,259,Stream Chat,Chat,2021-05-18
9,S7ylehunter,Deleteable/Eraseble whispers or direct messages.,For give aways sometimes important.The message...,413,46,Whispers,Chat,2023-10-01


## Sample CSV writer 

In [6]:
BASE_URL = "https://twitch.uservoice.com/forums/310201-chat?filter=top&page="
page_no = 1
csv_file = 'twitch_ideas.csv'

# Check if the file exists. If not, write the header.
if not os.path.exists(csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Name'])

# Iterate through each page
while True:
    if page_no > 3:  # Stop after 3 pages
        break

    url = BASE_URL + str(page_no)

    # Fetch website content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')

    # Append data to the CSV file
    with open(csv_file, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for idea in ideas:
            # Extract the required data
            name = idea.find('h2', class_='uvIdeaTitle uvCustomLink-color').get_text(strip=True)

            # Write to the CSV
            writer.writerow([name])

    # Check for the next page. If no next page is found, break
    next_page_indicator = soup.find('a', rel='next')
    if not next_page_indicator:
        print("Scraping completed and data is saved to twitch_ideas.csv")
        break

    page_no += 1

## Testing data extraction

In [181]:
list_items = []
for idea in ideas:
    name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
    list_items.append(name)

In [182]:
list_items

['Keep Moments',
 'Verified status for everyone and /verifiedonly chat mode',
 'Channel Points Leaderboard',
 '/spoiler command',
 'Birthday Badge',
 'Let chants be optional instead of removing them',
 'Ability to turn off chat reply/threads feature',
 'Delete your own message',
 '[Test] Crowd Chant',
 'Deleteable/Eraseble whispers or direct messages.',
 'Developer Chat Badge',
 'Add a queue to /shoutout',
 'Recent chat history',
 'Get Rid of Chat Replies',
 'View twitch chat in VODS',
 'Allow Incoming Viewers To See Previous Chat Messages',
 'Option to not show the results before people vote',
 'Fix the constant disconnecting of chat.',
 'Create a "Global Paintbrush Badge" for artists who have designed emotes for a certain # of channels',
 'total follow time instead of follow date']

## Seperating a single element into two

In [177]:
list_items_1 = []
list_items_2 = []
for idea in ideas:
    meta_div = idea.find('div', class_='uvIdeaMeta')
        
    comments_tag = meta_div.find('a', title=lambda x: x and x.startswith("Comments for"))
    num_comments = comments_tag.get_text(strip=True).replace("comments", "").strip()
    num_comments = int(num_comments)

    category_tag = meta_div.find('a', title=lambda x: x and x.startswith("Ideas similar to"))
    category = category_tag.get_text(strip=True)
    list_items_1.append(num_comments)
    list_items_2.append(category)

In [178]:
len(list_items_1)

20

In [179]:
len(list_items_2)

20

In [180]:
list_items_1

[453, 246, 21, 15, 11, 42, 41, 28, 259, 46, 15, 35, 20, 25, 6, 4, 3, 45, 9, 71]

In [156]:
list_items_2

['Stream Chat',
 'Feature Request',
 'Leaderboards',
 'Commands',
 'Feature Request',
 'Feature Request',
 'Replies',
 'Stream Chat',
 'Stream Chat',
 'Whispers',
 'Stream Chat',
 'Commands',
 'Feature Request',
 'Replies',
 'Feature Request',
 'Stream Chat',
 'Polls',
 'Bugs',
 'Identity',
 'User Card']

## Attempt at looping pages

In [191]:
BASE_URL = "https://twitch.uservoice.com/forums/310201-chat?filter=top&page="
page_no = 1
multi_test = []
while True:
    url = BASE_URL + str(page_no)
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    
    # All scraping logic here
    for idea in ideas:
        name = idea.find('h2',class_='uvIdeaTitle uvCustomLink-color').get_text(strip = True)
        multi_test.append(name)

    next_page_indicator = soup.find('a', rel='next')
    
    if not next_page_indicator:
        break

    page_no += 1 

In [193]:
len(multi_test)

855

## Attempt at grabbing detailed information

In [45]:
detailed_url = "https://twitch.uservoice.com"
BASE_URL = "https://twitch.uservoice.com/forums/310201-chat?filter=top&page="
page_no = 1
authorl = []
datel = []
while True:
    if page_no > 2:# Stop after 2 pages
        break
    url = BASE_URL + str(page_no)
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    ideas = soup.find_all('li', class_='uvListItem uvIdea uvIdea-list')
    detailed_links = [a['href'] for a in soup.find_all('a', href=True) if "/forums/310201-chat/suggestions/" in a['href'] and
              "#comments" not in a['href']]
    
    # All scraping logic here
    for link in detailed_links:
        time.sleep(1)
    
        # Make a request to the detailed idea page
        idea_response = requests.get(detailed_url + link)
        idea_soup = BeautifulSoup(idea_response.content, 'html.parser')

        # Extract author and date
        author = idea_soup.find('span', class_='vcard').get_text(strip=True)
        date = idea_soup.find('time').get_text(strip=True)
        
        authorl.append(author)
        datel.append(date)

    next_page_indicator = soup.find('a', rel='next')
    
    if not next_page_indicator:
        break

    page_no += 1 

In [46]:
authorl

['iateyourpie',
 'iAndy88',
 'GohgoDude',
 'Victor_sueca',
 'xSwagzy',
 'LunarGabriel',
 'subwithpr1me',
 'shiinto1993',
 'AdminTwitch(Admin, Twitch)',
 'S7ylehunter',
 'AdminJon Bulava(Developer Relations, Twitch)',
 'MoCoMade',
 'rafallero12',
 'ChefHazmat',
 'ranozex',
 'Gozen_',
 'MarTyNi',
 'bluee_pc',
 'Vessi',
 'mellen',
 'amygobrrr',
 'Rzfff',
 'J2xxR',
 'ctrl_CH',
 'RedWeird',
 'James',
 'ChiChi',
 'Anonymous',
 'Niuham',
 'Anonymous',
 'Mattie2171',
 'damnzl',
 'xmon390',
 'Danat',
 'sgtnoah',
 'InvisibleMan__',
 'treemarie',
 'Obst',
 'Wynced',
 'andreams_tv']

In [47]:
len(authorl)

40

In [49]:
datel[0]

'August 31, 2023'

In [48]:
type(datel[0])

str