In [3]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import psycopg2 as pg2

In [44]:
### Contestant Page ###

contestant_url = 'https://survivor.fandom.com/wiki/Julie_Alley'
contestant_url = 'https://survivor.fandom.com/wiki/Rupert_Boneham'
response = requests.get(contestant_url)
web_page = response.content
contestant_soup = BeautifulSoup(web_page, 'html.parser')

In [45]:
### Contestant Soup Processing ###

# Find number of seasons contestant played. I.e., # of <nav> elements with below class and attrs
nav_elements = contestant_soup.find_all('nav', class_='pi-navigation', attrs={'data-item-name': 'season'})
num_seasons = len(nav_elements)

if num_seasons > 1: # Data is in different spot on web page. May just manually sort for these cases.
    description, challenge_wins = None, None
else:
    # Find and extract content from 'description' meta tag
    description_tag = contestant_soup.find('meta', attrs={'name': 'description'})
    if description_tag:
        description = description_tag.get('content')
    else:
        description = None

    # Find and extract challenge wins
    challenge_wins_tag = contestant_soup.find('div', {'data-source': 'challenges'})
    if challenge_wins_tag:
        challenge_wins = challenge_wins_tag.find('div', class_='pi-data-value pi-font').text.strip()
    else:
        challenge_wins = None

print(num_seasons)
print(description)
print(challenge_wins)

4
Rupert Boneham
Rupert Fredrick Boneham is a contestant from Survivor: Pearl Islands, Survivor: All-Stars, Survivor: Heroes vs. Villains, and Survivor: Blood vs. Water. In Pearl Islands, Rupert won the hearts of fans with his colorful personality, as he showcased a grizzled, piratical persona that melded well...
11


In [17]:
### Season Page ###

url = 'https://survivor.fandom.com/wiki/Survivor_45'
response = requests.get(url)
web_page = response.content
soup = BeautifulSoup(web_page, 'html.parser')

In [38]:
# Initialize an empty list to hold the contestant details
contestant_details = []

# Find the table
table = soup.find('table', {'class': 'wikitable sortable'})

# Iterate through each row in the table, skipping the header row(s)
for row in table.find_all('tr')[2:]:  # Adjusted to skip the first two header rows
    cells = row.find_all('td')
    if cells:  # Check if there are any cells in the row to avoid header rows
        # Extract the first link in the second cell (index 1) of each row
        link_tag = cells[1].find('a', href=True)
        if link_tag:  # Check if a link exists
            # Extract the contestant name from the text of the first link in the second cell (index 1) of each row
            name_tag = cells[1].find('a', href=True)
            name = name_tag.text if name_tag else None  # Use .text to get the link text, which is the name
            
            link = name_tag['href'] if name_tag else None
            full_link = f"https://survivor.fandom.com{link}" if link else None
            
            # Initialize other_seasons with None
            other_seasons = None
            
            # Check for an additional season link within an <i> tag
            other_seasons_tag = cells[1].find('i').find('a', title=True) if cells[1].find('i') else None
            if other_seasons_tag:
                other_seasons = other_seasons_tag['title']
            
            # Append the details to the list
            contestant_details.append({'name': name, "wiki_link": full_link, "other_seasons": other_seasons})

# Create a DataFrame from the list
df_contestants = pd.DataFrame(contestant_details)

# Display or save the DataFrame
print(df_contestants)

                 name                                          wiki_link  \
0         Hannah Rose       https://survivor.fandom.com/wiki/Hannah_Rose   
1      Brandon Donlon    https://survivor.fandom.com/wiki/Brandon_Donlon   
2   Sabiyah Broderick  https://survivor.fandom.com/wiki/Sabiyah_Brode...   
3        Sean Edwards      https://survivor.fandom.com/wiki/Sean_Edwards   
4        Brando Meyer      https://survivor.fandom.com/wiki/Brando_Meyer   
5             J. Maya           https://survivor.fandom.com/wiki/J._Maya   
6          Sifu Alsup        https://survivor.fandom.com/wiki/Sifu_Alsup   
7     Kaleb Gebrewold   https://survivor.fandom.com/wiki/Kaleb_Gebrewold   
8   Kellie Nalbandian  https://survivor.fandom.com/wiki/Kellie_Nalban...   
9    Kendra McQuarrie  https://survivor.fandom.com/wiki/Kendra_McQuarrie   
10    Bruce Perreault   https://survivor.fandom.com/wiki/Bruce_Perreault   
11      Emily Flippen     https://survivor.fandom.com/wiki/Emily_Flippen   
12        Dr

In [35]:
# Connect and create cursor
conn = pg2.connect(database='survivor', user='postgres', password='password')
cur = conn.cursor()

season_number = 45
table_name = f"seasons.season_{season_number}_contestants"

# SQL query to create the table
create_table_query = f"""
CREATE TABLE {table_name} (
    id SERIAL PRIMARY KEY,
    contestant_name VARCHAR(255),
    contestant_link VARCHAR(255),
    other_seasons VARCHAR(255)
);
"""

try:
    cur.execute(create_table_query)
    conn.commit()  # Commit the transaction
    print(f"Table {table_name} created successfully.")
except Exception as e:
    print(f"An error occurred: {e}")
finally:
    cur.close()
    conn.close()

Table seasons.season_45_contestants created successfully.


In [4]:
### Main Survivor Page ###

url = 'https://survivor.fandom.com/wiki/Survivor_(U.S.)'
response = requests.get(url)
web_page = response.content
soup = BeautifulSoup(web_page, 'html.parser')

In [31]:
# Find the table - assuming it's the first "wikitable" on the page
table = soup.find("table", {"class": "wikitable"})

# Initialize a list to store the extracted data
season_links = []

# Iterate over each row in the table, skipping the header row
for row in table.find_all("tr")[1:]:
    cells = row.find_all("td")
    
    if cells:  # Check if there are cells in the row to avoid processing header/empty rows
        season_link_tag = cells[0].find("a")
        season_link = "https://survivor.fandom.com" + season_link_tag.get("href")
        
        # Append the extracted information to the list
        season_links.append(season_link)

print(season_links)
print(len(season_links))



['https://survivor.fandom.com/wiki/Survivor:_Borneo', 'https://survivor.fandom.com/wiki/Survivor:_The_Australian_Outback', 'https://survivor.fandom.com/wiki/Survivor:_Africa', 'https://survivor.fandom.com/wiki/Survivor:_Marquesas', 'https://survivor.fandom.com/wiki/Survivor:_Thailand', 'https://survivor.fandom.com/wiki/Survivor:_The_Amazon', 'https://survivor.fandom.com/wiki/Survivor:_Pearl_Islands', 'https://survivor.fandom.com/wiki/Survivor:_All-Stars', 'https://survivor.fandom.com/wiki/Survivor:_Vanuatu', 'https://survivor.fandom.com/wiki/Survivor:_Palau', 'https://survivor.fandom.com/wiki/Survivor:_Guatemala', 'https://survivor.fandom.com/wiki/Survivor:_Panama', 'https://survivor.fandom.com/wiki/Survivor:_Cook_Islands', 'https://survivor.fandom.com/wiki/Survivor:_Fiji', 'https://survivor.fandom.com/wiki/Survivor:_China', 'https://survivor.fandom.com/wiki/Survivor:_Micronesia', 'https://survivor.fandom.com/wiki/Survivor:_Gabon', 'https://survivor.fandom.com/wiki/Survivor:_Tocantins'

In [25]:
for row in table.find_all("tr")[1:2]:
    cells = row.find_all("td")
    print(cells)
    
    if cells:  # Check if there are cells in the row to avoid processing header/empty rows
        # Extract the season's Wikipedia link and title
        season_link_tag = cells[0].find("a")
        season_link = "https://survivor.fandom.com" + season_link_tag.get("href")
        season_title = season_link_tag.get("title")
        
        # Extract the Sole Survivor's name
        sole_survivor = cells[6].find("a")['title']
        
        # Append the extracted information to the list
        print(cells[6])
        print((season_title, season_link, sole_survivor))
        extracted_data.append((season_title, season_link, sole_survivor))

[<td><a href="/wiki/Survivor:_Borneo" title="Survivor: Borneo"><img alt="Borneo" class="lazyload" data-image-key="Borneo.png" data-image-name="Borneo.png" data-relevant="1" data-src="https://static.wikia.nocookie.net/survivor/images/f/f5/Borneo.png/revision/latest/scale-to-width-down/100?cb=20180424232521" decoding="async" height="64" loading="lazy" src="data:image/gif;base64,R0lGODlhAQABAIABAAAAAP///yH5BAEAAAEALAAAAAABAAEAQAICTAEAOw%3D%3D" width="100"/></a><br/><i><a href="/wiki/Survivor:_Borneo" title="Survivor: Borneo">Borneo</a></i>
</td>, <td><a href="/wiki/Pagong" title="Pagong"><font style="bordercolor:background:#ffff00; color:black; background:#ffff00; color:black; border-radius:3px; padding:2px;">Pagong</font></a><br/><a href="/wiki/Tagi" title="Tagi"><font style="bordercolor:background:#ff8033; color:black; background:#ff8033; color:black; border-radius:3px; padding:2px;">Tagi</font></a><br/><a href="/wiki/Rattana" title="Rattana"><font style="bordercolor:background:#99ff33;