This project will scrape data from VerseTracker and place it into a data frame. We save the dataframe as a csv and create a tunnel to google big query.

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
import time
from google.cloud import bigquery
import pandas_gbq
client = bigquery.Client()
import os 

In [None]:
index = 0
battle_rappers_dict = {"Battle Rapper": [], "Hometown": [], "Bio": [], "Total Battles": [], "Total Views": [], "Average Views": []}
prepend_url = 'https://versetracker.com'

while True:  
    print("Page Extracted:", index)
    
    response = requests.get(
        f"https://versetracker.com/all-rappers?name_1=&field_country_tid=21227&field_gender_value=&field_speaks_english_value=&sort_by=field_total_views_value&page={index}"
    )

    soup = BeautifulSoup(response.text, 'html.parser')
    extractedRows = soup.find_all('div', class_="views-row")

    if not extractedRows:
        print("No more pages. Exiting.")
        break

    for row in extractedRows:
        rapperColumns = row.find_all('div', class_='views-column')

        for col in rapperColumns:
            name_tag = col.find('div', class_="rapper-grid-rapper-name")

            if not name_tag:
                continue

            name = name_tag.text.strip()
            link = col.find('a').get('href')
            full_url = prepend_url + link

            battle_rappers_dict["Battle Rapper"].append(name)

            try:

                profile_html = requests.get(full_url).text
                br_soup = BeautifulSoup(profile_html, 'html.parser')

                hometown = br_soup.find('div', class_="rapper-location-info")
                bio = br_soup.find('div', class_="rapper-bio")
                stats = br_soup.find('div', class_="rapper-stat-block")

                battle_rappers_dict["Hometown"].append(hometown.text.strip() if hometown else "")
                battle_rappers_dict["Bio"].append(bio.text.strip() if bio else "")

                if stats:
                    total_views = stats.find("span", class_="num-font").text
                    total_battles = stats.next_sibling.find('span', class_="num-font").text
                    average_views = stats.next_sibling.next_sibling.find('span', class_="num-font").text

                    battle_rappers_dict["Total Views"].append(total_views)
                    battle_rappers_dict["Total Battles"].append(total_battles)
                    battle_rappers_dict["Average Views"].append(average_views)

            except Exception as e:
                print(f"Error scraping {name}: {e}")

    index += 1


In [None]:
df = pd.DataFrame(battle_rappers_dict)
df.to_csv('us_battle_rappers_complete.csv', index=False)

In [None]:
json = os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/Users/reginaldamedee/downloads/battlerapperdb-b2f485aaaacb.json" 

In [None]:
project_id = 'battlerapperdb'
table_name = "battlerapperdb.us_based_battle_rappers_table"

df = pd.read_csv('us_battle_rappers.csv')
pandas_gbq.to_gbq(df, table_name, project_id=project_id, if_exists='replace')
print(f"DataFrame successfully uploaded to BigQuery table: {table_name}")