## Importing libraries

In [15]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import re
import csv

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [16]:
site = 'https://game8.co/games/Last-of-Us-2/archives/290290' # walkthrough page
response = requests.get(site)
response

<Response [200]>

In [17]:
soup = BeautifulSoup(response.text, 'html.parser')

## Gets the chapter images

This function is used to get all of the images links based on the alt text.

So by making a loop to go through either data-src or src images links we get a list of images links and depending if the src begins with //, we can add the https or the full website url to make it a full url.

Then it compares if any of the target names are found in the url alt texts, if yes then they are agged to the images link.


In [18]:
def get_filtered_image_links(soup, target_names):
    images = []
    img_tags = soup.find_all('img')
    
    for img in img_tags:
        img_url = img.get('data-src') or img.get('src')
        
        if img_url:
            if img_url.startswith('//'):
                img_url = 'https:' + img_url
            elif img_url.startswith('/'):
                img_url = 'https://game8.co' + img_url

            alt_text = img.get('alt', '').lower()
            
            if any(target_name.lower() in alt_text for target_name in target_names):
                images.append({
                    'url': img_url,
                    'alt': alt_text
                })
    
    return images

Make a list of names I need to specifically find and run the function.

Then I run a loop through each image and compare their alt text to my find names text and make a dictionary key pair to store in the chapter_images dictionary.

In [19]:
find_names = [
    'Jackson', 'Seattle Day 1', 'Seattle Day 2', 'Seattle Day 3', 'The Park', 
    'The Farm', 'Santa Barbara'
]

filtered_images = get_filtered_image_links(soup, find_names)

chapter_images = {}
for image in filtered_images:
    for chapter in find_names:
        if chapter.lower() in image['alt']:
            chapter_images[chapter] = image['url']

print(chapter_images)

{'Jackson': 'https://img.game8.co/3252676/016d74f47a69207695a8bd98faa9df04.jpeg/show', 'Seattle Day 1': 'https://img.game8.co/3252681/208e3bd72b0c4ce49f7c2ee308853d6a.jpeg/show', 'Seattle Day 2': 'https://img.game8.co/3252682/541861fadb8610c141286de7d2f98d0c.jpeg/show', 'Seattle Day 3': 'https://img.game8.co/3255016/3fb74e357dc25e5e5368f6cfb994bd54.jpeg/show', 'The Park': 'https://img.game8.co/3252680/68d10ee83ce0e2bad1b133e3aaef1019.jpeg/show', 'The Farm': 'https://img.game8.co/3252775/c195c6ace9970eb25b97e3e7dc47d8a8.png/show', 'Santa Barbara': 'https://img.game8.co/3252774/2a7f34405d54ee3b0e1ebfdecb2831d5.png/show'}


## Get the chapter data and combine it with the images data to get a final CSV

This code is extracting both chapter names and their sub-chapters with urls.

It finds teh chapter names -> processes each chapter by finding their sub-chapter and links them together

At the end for each chapter it has a list of sub-chapter, each containtg a name and url, and storing them in a dictionary

In [20]:
chapters = soup.find_all('a', class_='list_contents')
chapter_names = [chapter.get_text(strip=True) for chapter in chapters]

base_url = "https://game8.co"
sub_chapters_dict = {}

chapters_header = soup.find_all('h3', class_='a-header--3')

for chapter in chapters_header:
    chapter_title = chapter.get_text(strip=True)
    table = chapter.find_next('table', class_='a-table')
    sub_chapters = table.find_all('td', class_='center')
    
    sub_chapters_list = []
    
    for sub_chapter in sub_chapters:
        link = sub_chapter.find('a', class_='a-link')
        if link:
            sub_chapters_list.append({
                'text': link.get_text(strip=True),
                'url': base_url + link['href']
            })
    
    sub_chapters_dict[chapter_title] = sub_chapters_list

In [21]:
csv_data = []

for chapter_key in sub_chapters_dict.keys():
    chapter_name = chapter_key.split(':')[-1].strip()

    row = {
        'Chapter Name': chapter_key,  
        'Chapter Image': chapter_images.get(chapter_name, 'No Image Available'), # placeholder if no image is found 
        'Sub-chapters': ', '.join([sub_chap['text'] for sub_chap in sub_chapters_dict.get(chapter_key, [])]),
        'Sub-chapter URLs': ', '.join([sub_chap['url'] for sub_chap in sub_chapters_dict.get(chapter_key, [])]),
    }
    csv_data.append(row)


csv_file = 'chapter_data.csv'

with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=['Chapter Name', 'Chapter Image', 'Sub-chapters', 'Sub-chapter URLs'])
    writer.writeheader()
    writer.writerows(csv_data)

print(f"CSV file '{csv_file}' created successfully!")

CSV file 'chapter_data.csv' created successfully!


## Chapter walkthrough

In [22]:
chapters_df = pd.read_csv('chapter_data.csv')
chapters_df.head()

Unnamed: 0,Chapter Name,Chapter Image,Sub-chapters,Sub-chapter URLs
0,Chapter 1: Jackson,https://img.game8.co/3252676/016d74f47a6920769...,"Prologue, Waking Up, The Overlook, Patrol, The...",https://game8.co/games/Last-of-Us-2/archives/2...
1,Chapter 2: Seattle Day 1,https://img.game8.co/3252681/208e3bd72b0c4ce49...,"The Gate, Downtown, Eastbrook Elementary, Capi...",https://game8.co/games/Last-of-Us-2/archives/2...
2,Chapter 3: Seattle Day 2,https://img.game8.co/3252682/541861fadb8610c14...,"Hillcrest, Finding Strings, The Seraphites, St...",https://game8.co/games/Last-of-Us-2/archives/2...
3,Chapter 4: Seattle Day 3,https://img.game8.co/3255016/3fb74e357dc25e5e5...,"Road to the Aquarium, The Flooded City, Infilt...",https://game8.co/games/Last-of-Us-2/archives/2...
4,Chapter 5: The Park,https://img.game8.co/3252680/68d10ee83ce0e2bad...,Tracking Lesson,https://game8.co/games/Last-of-Us-2/archives/2...


In [23]:
all_walkthrough_data = []

for index, row in chapters_df.iterrows():
    chapter_name = row['Chapter Name']
    sub_chapter_urls_string = row['Sub-chapter URLs']
    sub_chapter_urls_list = sub_chapter_urls_string.split(', ')

In [24]:
all_walkthrough_data = []

for index, row in chapters_df.iterrows():
    chapter_name = row['Chapter Name']
    sub_chapter_urls_string = row['Sub-chapter URLs']

    sub_chapter_urls_list = sub_chapter_urls_string.split(', ')
    sub_chapter_names_string = row['Sub-chapters']
    sub_chapter_names_list = sub_chapter_names_string.split(', ')


    main_chapter_name = chapter_name.split(':')[-1].strip()

    for i, sub_chapter_url in enumerate(sub_chapter_urls_list):
        try:
            response = requests.get(sub_chapter_url)
            response.raise_for_status()  

            chapter_soup = BeautifulSoup(response.text, 'html.parser')

            sub_chapter_tag = chapter_soup.find('h2', class_='a-header--2')
            if sub_chapter_tag:
                extracted_sub_chapter_name = sub_chapter_tag.get_text(strip=True)
                extracted_sub_chapter_name = extracted_sub_chapter_name.replace(f"{main_chapter_name} - ", "").strip()
                extracted_sub_chapter_name = extracted_sub_chapter_name.replace(f"{main_chapter_name}: ", "").strip()

                sub_chapter_name = f"{main_chapter_name} - {extracted_sub_chapter_name}"
            elif i < len(sub_chapter_names_list):
                sub_chapter_name = f"{main_chapter_name} - {sub_chapter_names_list[i].strip()}"
            else:
                sub_chapter_name = 'N/A'


            spans = chapter_soup.find_all('span', style="font-size:120%;")
            extracted_text = [span.get_text(strip=True) for span in spans]

            formatted_text = []
            if extracted_text:
                first_item = extracted_text[0].rstrip('.')
                formatted_text.append(first_item[0].upper() + first_item[1:].lower())

                for item in extracted_text[1:]:
                    formatted_text.append(item.lower())


            grouped_text = []
            for j in range(0, len(formatted_text), 2):
                line_items = formatted_text[j:j+2]
                if len(line_items) > 1:
                    grouped_text.append(", and ".join(line_items))
                elif line_items:
                    grouped_text.append(line_items[0])

            all_walkthrough_data.append({
                'Chapter Name': chapter_name,
                'Sub-chapter Name': sub_chapter_name,
                'Sub-chapter URL': sub_chapter_url,
                'Walkthrough Text': "\n".join(grouped_text)
            })

        except requests.exceptions.RequestException as e:
            print(f"Error fetching {sub_chapter_url}: {e}")
            all_walkthrough_data.append({
                'Chapter Name': chapter_name,
                'Sub-chapter Name': f"{main_chapter_name} - {sub_chapter_names_list[i].strip()}" if i < len(sub_chapter_names_list) else 'N/A',
                'Sub-chapter URL': sub_chapter_url,
                'Walkthrough Text': f"Error fetching data: {e}"
            })
        except Exception as e:
            print(f"An unexpected error occurred while processing {sub_chapter_url}: {e}")
            all_walkthrough_data.append({
                'Chapter Name': chapter_name,
                'Sub-chapter Name': f"{main_chapter_name} - {sub_chapter_names_list[i].strip()}" if i < len(sub_chapter_names_list) else 'N/A',
                'Sub-chapter URL': sub_chapter_url,
                'Walkthrough Text': f"An unexpected error occurred: {e}"
            })

In [25]:
walkthrough_df = pd.DataFrame(all_walkthrough_data)
walkthrough_df.head()

Unnamed: 0,Chapter Name,Sub-chapter Name,Sub-chapter URL,Walkthrough Text
0,Chapter 1: Jackson,Jackson - Prologue Walkthrough,https://game8.co/games/Last-of-Us-2/archives/2...,"Follow tommy all the way to town, and play the..."
1,Chapter 1: Jackson,Jackson - Waking Up Walkthrough,https://game8.co/games/Last-of-Us-2/archives/2...,"Follow jesse, and follow jesse and maria.\nhav..."
2,Chapter 1: Jackson,Jackson - The Overlook Walkthrough,https://game8.co/games/Last-of-Us-2/archives/2...,"Follow owen, and after you and owen split up, ..."
3,Chapter 1: Jackson,Jackson - Patrol Walkthrough,https://game8.co/games/Last-of-Us-2/archives/2...,"Follow dina, and after getting off your horse,..."
4,Chapter 1: Jackson,Jackson - The Horde Walkthrough,https://game8.co/games/Last-of-Us-2/archives/2...,"Run away from the horde of infected, and follo..."


In [26]:
walkthrough_df.to_csv('walkthrough_data.csv', index=False)

## Tips and Tricks

In [28]:
tips = 'https://game8.co/games/Last-of-Us-2/archives/290295' # tips page
response = requests.get(tips)
response

<Response [200]>

In [29]:
tips_soup = BeautifulSoup(response.text, 'html.parser')

In [30]:
target_names = [
    'Combat Tips - Banner.jpg', 'Horse Riding - Jumping.jpg', 'Crafting Training Manual.jpg', 'Opening Safe.jpg', 'Guitar Chords.jpg'
]

filtered_images = get_filtered_image_links(tips_soup, target_names)

tips_images = {}
for image in filtered_images:
    for tip_name in target_names:
        if tip_name.lower() in image['alt']:
            tips_images[tip_name.replace('.jpg', '').strip()] = image['url']
            break

print(tips_images)

{'Combat Tips - Banner': 'https://img.game8.co/3252039/f1c76a5d04f6c72dc2b31e137bd066e1.jpeg/show', 'Horse Riding - Jumping': 'https://img.game8.co/3252049/0f48816d79470f9464839df5bd2ea9cc.jpeg/show', 'Crafting Training Manual': 'https://img.game8.co/3252313/ab7a7612ae96ef357bdbd4d8f92a318f.jpeg/show', 'Opening Safe': 'https://img.game8.co/3252061/2582ac528f3b9b756876e7754f2580df.jpeg/show', 'Guitar Chords': 'https://img.game8.co/3252032/1678b6de0090d87ce246ab9f3f612ccc.jpeg/show'}


In [37]:
base_url = "https://game8.co"
sub_tips_dict = {}

tips_header = tips_soup.find_all('h3', class_='a-header--3')

tips_elements = tips_soup.find_all('a', class_='list_contents')
tips_names = [tip.get_text(strip=True) for tip in tips_elements]


for tip in tips_header:
  tip_title = tip.get_text(strip=True)
  table = tip.find_next('table', class_='a-table')
  sub_tips = table.find_all('td', class_='center')

  sub_tips_list = []

  for sub_tip in sub_tips:
    link = sub_tip.find('a', class_='a-link')
    if link:
      sub_tips_list.append({
        'text': link.get_text(strip=True),
        'url': base_url + link['href']
      })

  sub_tips_dict[tip_title] = sub_tips_list



print(sub_tips_dict)

{'Last of Us 2 Gameplay Guides': [{'text': 'Combat Guide and Tips', 'url': 'https://game8.co/games/Last-of-Us-2/archives/290684'}, {'text': 'List of Skill Upgrades', 'url': 'https://game8.co/games/Last-of-Us-2/archives/290677'}, {'text': 'How to Use Listen Mode', 'url': 'https://game8.co/games/Last-of-Us-2/archives/290674'}, {'text': 'How to Stealth Kill | Stealth Tips', 'url': 'https://game8.co/games/Last-of-Us-2/archives/290672'}, {'text': 'How to Upgrade Weapons', 'url': 'https://game8.co/games/Last-of-Us-2/archives/290500'}], 'Last of Us 2 Controls': [{'text': 'Game Controls & Settings', 'url': 'https://game8.co/games/Last-of-Us-2/archives/291174'}, {'text': 'How to Ride the Horse', 'url': 'https://game8.co/games/Last-of-Us-2/archives/290686'}, {'text': 'How to Heal', 'url': 'https://game8.co/games/Last-of-Us-2/archives/290708'}, {'text': 'How to Use Quick Turn', 'url': 'https://game8.co/games/Last-of-Us-2/archives/290688'}, {'text': 'How to Run (Sprint)', 'url': 'https://game8.co/

In [38]:

tip_image_mapping = {
    'Last of Us 2 Gameplay Guides': 'Combat Tips - Banner',
    'Last of Us 2 Controls': 'Horse Riding - Jumping', 
    'Last of Us 2 Materials and Parts': 'Crafting Training Manual', 
    'Last of Us 2 Exploration and Secrets Guides': 'Opening Safe', 
    'Last of Us 2 Miscellaneous Guides': 'Guitar Chords' 
}


print("Tip image mapping created:")
print(tip_image_mapping)

Tip image mapping created:
{'Last of Us 2 Gameplay Guides': 'Combat Tips - Banner', 'Last of Us 2 Controls': 'Horse Riding - Jumping', 'Last of Us 2 Materials and Parts': 'Crafting Training Manual', 'Last of Us 2 Exploration and Secrets Guides': 'Opening Safe', 'Last of Us 2 Miscellaneous Guides': 'Guitar Chords'}


In [39]:
csv_data = []

for tip_key in sub_tips_dict.keys():

    image_key_from_mapping = tip_image_mapping.get(tip_key)


    matching_image_url = tips_images.get(image_key_from_mapping, 'No Image Available')

    row = {
        'Tip Name': tip_key,
        'Tip Image': matching_image_url,
        'Sub-tips': ', '.join([sub_tip['text'] for sub_tip in sub_tips_dict.get(tip_key, [])]),
        'Sub-tip URLs': ', '.join([sub_tip['url'] for sub_tip in sub_tips_dict.get(tip_key, [])]),
    }
    csv_data.append(row)


csv_file = 'tips_data.csv'

with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=['Tip Name', 'Tip Image', 'Sub-tips', 'Sub-tip URLs'])
    writer.writeheader()
    writer.writerows(csv_data)

print(f"CSV file '{csv_file}' created successfully!")

CSV file 'tips_data.csv' created successfully!


In [40]:
tips_df = pd.read_csv('tips_data.csv')
tips_df.head()

Unnamed: 0,Tip Name,Tip Image,Sub-tips,Sub-tip URLs
0,Last of Us 2 Gameplay Guides,https://img.game8.co/3252039/f1c76a5d04f6c72dc...,"Combat Guide and Tips, List of Skill Upgrades,...",https://game8.co/games/Last-of-Us-2/archives/2...
1,Last of Us 2 Controls,https://img.game8.co/3252049/0f48816d79470f946...,"Game Controls & Settings, How to Ride the Hors...",https://game8.co/games/Last-of-Us-2/archives/2...
2,Last of Us 2 Materials and Parts,https://img.game8.co/3252313/ab7a7612ae96ef357...,"What are Scavenged Parts?, What are Ingredient...",https://game8.co/games/Last-of-Us-2/archives/2...
3,Last of Us 2 Exploration and Secrets Guides,https://img.game8.co/3252061/2582ac528f3b9b756...,"Safe Combinations and Locations, How to Find t...",https://game8.co/games/Last-of-Us-2/archives/2...
4,Last of Us 2 Miscellaneous Guides,https://img.game8.co/3252032/1678b6de0090d87ce...,"Which Skills Should You Upgrade First?, Which ...",https://game8.co/games/Last-of-Us-2/archives/2...


In [41]:
all_tips_data = []

for index, row in tips_df.iterrows():
    tip_name = row['Tip Name']
    sub_tip_urls_string = row['Sub-tip URLs']

    sub_tips_urls_list = sub_tip_urls_string.split(', ')
    sub_tip_name_string = row['Sub-tips']
    sub_tip_name_list = sub_tip_name_string.split(', ')

    main_tip_name = tip_name.split(':')[-1].strip()

    for i, sub_tip_url in enumerate(sub_tips_urls_list):
        try:
            response = requests.get(sub_tip_url)
            response.raise_for_status()

            sub_tip_soup = BeautifulSoup(response.text, 'html.parser') 

            
            sub_chapter_tag = sub_tip_soup.find('h2', class_='a-header--2')
            if sub_chapter_tag:
                extracted_sub_tip_name = sub_chapter_tag.get_text(strip=True)
                
                extracted_sub_tip_name = extracted_sub_tip_name.replace(f"{main_tip_name} - ", "").strip()
                extracted_sub_tip_name = extracted_sub_tip_name.replace(f"{main_tip_name}: ", "").strip()

                sub_tip_name = f"{main_tip_name} - {extracted_sub_tip_name}"
            elif i < len(sub_tip_name_list):
                sub_tip_name = f"{main_tip_name} - {sub_tip_name_list[i].strip()}"
            else:
                sub_tip_name = 'N/A'

            
            detailed_tip_dict = {}
            current_heading = None

            
            content_elements = sub_tip_soup.select('h2.a-header--2, h3.a-header--3, p.a-paragraph, table.a-table')

            for element in content_elements:
                if element.name in ['h2', 'h3']:
                    current_heading = element.get_text(strip=True)
                    detailed_tip_dict[current_heading] = [] 
                elif element.name == 'p' and 'a-paragraph' in element.get('class', []):
                    if current_heading:
                        detailed_tip_dict[current_heading].append(element.get_text(strip=True))
                elif element.name == 'table' and 'a-table' in element.get('class', []):
                     if current_heading:
                        
                        table_data = []
                        header_row = element.find('tr')
                        if header_row:
                            headers = [th.get_text(strip=True) for th in header_row.find_all('th')]
                            data_rows = element.find_all('tr')[1:] 
                            for row in data_rows:
                                cells = [td.get_text(strip=True) for td in row.find_all('td')]
                                if len(headers) == len(cells):
                                    table_data.append(dict(zip(headers, cells)))
                       
                        detailed_tip_dict[current_heading].append({'table_data': table_data})


           
            formatted_detailed_tip_dict = {}
            for heading, content_list in detailed_tip_dict.items():
                paragraph_content = [item for item in content_list if not isinstance(item, dict) or 'table_data' not in item]
                table_content = [item for item in content_list if isinstance(item, dict) and 'table_data' in item]

                heading_data = {}
                if paragraph_content:
                    heading_data['paragraphs'] = "\n".join(paragraph_content)
                if table_content:
                    heading_data['tables'] = table_content

                formatted_detailed_tip_dict[heading] = heading_data


            all_tips_data.append({
                'Tip Name': tip_name,
                'Sub-tip Name': sub_tip_name,
                'Sub-tip URL': sub_tip_url,
                'Detailed Tip Text': formatted_detailed_tip_dict 
            })

        except requests.exceptions.RequestException as e:
            print(f"Error fetching {sub_tip_url}: {e}")
            all_tips_data.append({
                'Tip Name': tip_name,
                'Sub-tip Name': f"{main_tip_name} - {sub_tip_name_list[i].strip()}" if i < len(sub_tip_name_list) else 'N/A',
                'Sub-tip URL': sub_tip_url,
                'Detailed Tip Text': f"Error fetching data: {e}"
            })
        except Exception as e:
            print(f"An unexpected error occurred while processing {sub_tip_url}: {e}")
            all_tips_data.append({
                'Tip Name': tip_name,
                'Sub-tip Name': f"{main_tip_name} - {sub_tip_name_list[i].strip()}" if i < len(sub_tip_name_list) else 'N/A',
                'Sub-tip URL': sub_tip_url,
                'Detailed Tip Text': f"An unexpected error occurred: {e}"
            })

In [42]:
tips_df = pd.DataFrame(all_tips_data)
tips_df.head()

Unnamed: 0,Tip Name,Sub-tip Name,Sub-tip URL,Detailed Tip Text
0,Last of Us 2 Gameplay Guides,Last of Us 2 Gameplay Guides - Combat Guide an...,https://game8.co/games/Last-of-Us-2/archives/2...,"{'Combat Guide and Tips': {}, 'Environmental A..."
1,Last of Us 2 Gameplay Guides,Last of Us 2 Gameplay Guides - What Are Player...,https://game8.co/games/Last-of-Us-2/archives/2...,{'What Are Player Upgrades?': {'paragraphs': '...
2,Last of Us 2 Gameplay Guides,Last of Us 2 Gameplay Guides - What is Listen ...,https://game8.co/games/Last-of-Us-2/archives/2...,{'What is Listen Mode?': {'paragraphs': 'Liste...
3,Last of Us 2 Gameplay Guides,Last of Us 2 Gameplay Guides - How to Stealth ...,https://game8.co/games/Last-of-Us-2/archives/2...,{'How to Stealth Kill': {'paragraphs': 'Stealt...
4,Last of Us 2 Gameplay Guides,Last of Us 2 Gameplay Guides - Workbench Custo...,https://game8.co/games/Last-of-Us-2/archives/2...,{'Workbench Customization': {'paragraphs': 'In...


In [43]:
tips_df.to_csv('detailed_tips_data.csv', index=False)
print("Detailed tips data saved to 'detailed_tips_data.csv'")

Detailed tips data saved to 'detailed_tips_data.csv'


## Characters

In [44]:
character = 'https://game8.co/games/Last-of-Us-2/archives/290477' #character page
response = requests.get(character)
response

<Response [200]>

In [None]:
character_soup = BeautifulSoup(response.text, 'html.parser')

In [None]:
target_names = [
    'Ellie Icon.png',
    'Dina Icon.png',
    'Joel Icon.png',
    'Jesse Icon.png',
    'Tommy Icon.png',
    'Maria Icon.png',
    'Seth Icon.png',
    'Abby Icon.png',
    'Owen Icon.png',
    'Mel Icon.png',
    'Nora Icon.png',
    'Manny Icon.png',
    'Jordan Icon.png',
    'Isaac Icon.png',
    'Alice Icon.png',
    'Jerry Icon.png',
    'Whitney Icon.png',
    'Nick Icon.png',
    'Lev Icon.png',
    'Yara Icon.png',
    'Emily Icon.png'

]


filtered_images = get_filtered_image_links(character_soup, target_names)

character_images = {}
for image in filtered_images:
    for character_name_with_icon in target_names:
        if character_name_with_icon.lower() in image['alt']:
            # Extract just the character name by removing ' Icon.png'
            character_name = character_name_with_icon.replace(' Icon.png', '').strip()
            character_images[character_name] = image['url']
            break


print(character_images)

In [None]:
character_main_image = {'Ellie' : 'https://img.game8.co/3253674/d94845ae35742fb7aeab666c545ce17c.jpeg/show',
                          'Joel' : 'https://img.game8.co/3253664/103b20839a9d395c4391a3465867cdcb.jpeg/show',
                          'Dina' : 'https://img.game8.co/3253702/457134b322ef240fbf357b4f3e2022bb.jpeg/show',
                          'Abby' : 'https://img.game8.co/3253680/e37a8d8fc6ff287dbf28926e004aebd3.jpeg/show',
                          'Yara' : 'https://img.game8.co/3253753/a53e019404ad3d253ca442e87e2b5270.jpeg/show',
                          'Nora' : 'https://img.game8.co/3253701/527bfd2d36d8b19d5dd586499b79b98f.jpeg/show',
                          'Lev_' : 'https://img.game8.co/3253754/95629e336e9e7bc1a7f9ffd871a12089.jpeg/show',
                          'Emily' : 'https://img.game8.co/3253750/b64de7fa1386b449733d20ec2343fc69.jpeg/show',
                          'Jesse' : 'https://img.game8.co/3253666/cd8a49d1e146c09662f466f1417b8063.jpeg/show',
                          'Tommy' : 'https://img.game8.co/3253667/5f6bdd9e98558a4b3a8d18fd39c8d45a.jpeg/show',
                          'Seth' : 'https://img.game8.co/3253700/442701af05e3748b9ccedc8a793a0000.jpeg/show',
                          'Mel' : 'https://img.game8.co/3253673/43e4ae0186bd6083dc0b0f1bda3d6fae.jpeg/show',
                          'Owen' : 'https://img.game8.co/3253668/37cb30d83159273a09bcd4c3eac43dc1.jpeg/show',
                          'Manny' : 'https://img.game8.co/3253714/59d1b2cada49306c655dc50b5690e78e.jpeg/show',
                          'Jordan' : 'https://img.game8.co/3253748/db9e05b070614c194c9d8e910f158883.jpeg/show',
                          'Nick' : 'https://img.game8.co/3253751/0cbbe465209bd878cc3c4fe7b134b961.jpeg/show',
                          'Alice' : 'https://img.game8.co/3253752/40349185b76d1d125192c59788f9e7a9.jpeg/show',
                          'Maria' : 'https://img.game8.co/3253871/34d5cb007beec80ed621a67594bcd565.jpeg/show',
                          'Jerry' : 'https://img.game8.co/3253888/433549ea350fab01b217e903199f165d.jpeg/show',
                          'Whitney' : 'https://img.game8.co/3253876/211eb1e5683a5d9ae17872049440fc81.jpeg/show',
                          'Isaac' : 'https://img.game8.co/3253872/66e409e5ea1a6c1649ac10146a013d70.jpeg/show'

}

In [None]:
character_profile_URLs = {'Ellie_page' : 'https://game8.co/games/Last-of-Us-2/archives/290451',
                          'Joel_page' : 'https://game8.co/games/Last-of-Us-2/archives/290454',
                          'Dina_page' : 'https://game8.co/games/Last-of-Us-2/archives/290477',
                          'Abby_page' : 'https://game8.co/games/Last-of-Us-2/archives/290497',
                          'Yara_page' : 'https://game8.co/games/Last-of-Us-2/archives/290493',
                          'Nora_page' : 'https://game8.co/games/Last-of-Us-2/archives/290491',
                          'Lev_page' : 'https://game8.co/games/Last-of-Us-2/archives/290495',
                          'Emily_page' : 'https://game8.co/games/Last-of-Us-2/archives/290496',
                          'Jesse_page' : 'https://game8.co/games/Last-of-Us-2/archives/290486',
                          'Tommy_page' : 'https://game8.co/games/Last-of-Us-2/archives/290458',
                          'Seth_page' : 'https://game8.co/games/Last-of-Us-2/archives/291199',
                          'Mel_page' : 'https://game8.co/games/Last-of-Us-2/archives/291200',
                          'Owen_page' : 'https://game8.co/games/Last-of-Us-2/archives/291205',
                          'Manny_page' : 'https://game8.co/games/Last-of-Us-2/archives/291206',
                          'Jordan_page' : 'https://game8.co/games/Last-of-Us-2/archives/291209',
                          'Nick_page' : 'https://game8.co/games/Last-of-Us-2/archives/291210',
                          'Alice_page' : 'https://game8.co/games/Last-of-Us-2/archives/291211',
                          'Maria_page' : 'https://game8.co/games/Last-of-Us-2/archives/291219',
                          'Jerry_page' : 'https://game8.co/games/Last-of-Us-2/archives/291220',
                          'Whitney_page' : 'https://game8.co/games/Last-of-Us-2/archives/291221',
                          'Isaac_page' : 'https://game8.co/games/Last-of-Us-2/archives/291222'

}

In [None]:
character_details_list = []

for character_name_with_suffix, profile_url in character_profile_URLs.items():
    try:
        response = requests.get(profile_url)
        response.raise_for_status() # Raise an exception for bad status codes

        profile_soup = BeautifulSoup(response.text, 'html.parser')

        # Extract the character name without the '_page' suffix
        character_name = character_name_with_suffix.replace('_page', '').strip()


        # --- Extract Character Information based on the provided structure ---
        character_info_dict = {}
        current_heading = None

        # Find all relevant heading, paragraph, and table tags in order
        content_elements = profile_soup.select('h2.a-header--2, h3.a-header--3, p.a-paragraph, table.a-table')

        for element in content_elements:
            if element.name in ['h2', 'h3']:
                current_heading = element.get_text(strip=True)
                character_info_dict[current_heading] = [] # Initialize a list for content under this heading
            elif element.name == 'p' and 'a-paragraph' in element.get('class', []):
                # Append the paragraph text to the list under the current heading if a heading was found
                if current_heading:
                    character_info_dict[current_heading].append(element.get_text(strip=True))
            elif element.name == 'table' and 'a-table' in element.get('class', []):
                # Check for specific table classes if needed, but targeting 'a-table' seems sufficient based on description
                 if current_heading:
                    # Extract table data
                    table_data = []
                    header_row = element.find('tr')
                    if header_row:
                        headers = [th.get_text(strip=True) for th in header_row.find_all('th')]
                        data_rows = element.find_all('tr')[1:] # Skip header row
                        for row in data_rows:
                            cells = [td.get_text(strip=True) for td in row.find_all('td')]
                            if len(headers) == len(cells):
                                table_data.append(dict(zip(headers, cells)))
                    # Add table data to the list under the current heading, maybe with a key to indicate it's a table
                    if table_data: # Only add if table data was actually extracted
                         character_info_dict[current_heading].append({'table_data': table_data})


        # --- Format the extracted information ---
        # Join paragraphs under each heading and include tables
        formatted_character_info = {}
        for heading, content_list in character_info_dict.items():
            paragraph_content = [item for item in content_list if not isinstance(item, dict) or 'table_data' not in item]
            table_content = [item for item in content_list if isinstance(item, dict) and 'table_data' in item]

            heading_data = {}
            if paragraph_content:
                heading_data['paragraphs'] = "\n".join(paragraph_content)
            if table_content:
                heading_data['tables'] = table_content

            if heading_data: # Only include the heading in the formatted output if it has content
                formatted_character_info[heading] = heading_data


        # --- Combine with Image URLs ---
        # Get the character's icon image URL from the character_images dictionary using the cleaned character_name
        icon_image_url = character_images.get(character_name, 'No Icon Image Available')
        # Get the character's main image URL from the character_main_image dictionary
        main_image_url = character_main_image.get(character_name, 'No Main Image Available')


        # Append the collected data for this character
        character_details_list.append({
            'Character Name': character_name,
            'Character Icon URL': icon_image_url,
            'Character Main Image URL': main_image_url, # Added main image URL
            'Profile URL': profile_url,
            'Character Info': formatted_character_info # Store the structured information dictionary
        })

    except requests.exceptions.RequestException as e:
        print(f"Error fetching {profile_url}: {e}")
        # Use the cleaned character_name for the error entry as well
        cleaned_name = character_name_with_suffix.replace('_page', '').strip()
        character_details_list.append({
            'Character Name': cleaned_name,
            'Character Icon URL': character_images.get(cleaned_name, 'No Icon Image Available'),
            'Character Main Image URL': character_main_image.get(cleaned_name, 'No Main Image Available'), # Added main image URL for error case
            'Profile URL': profile_url,
            'Character Info': f"Error fetching data: {e}", # Indicate error in info field
        })
    except Exception as e:
        print(f"An unexpected error occurred while processing {profile_url}: {e}")
        # Use the cleaned character_name for the error entry as well
        cleaned_name = character_name_with_suffix.replace('_page', '').strip()
        character_details_list.append({
            'Character Name': cleaned_name,
            'Character Icon URL': character_images.get(cleaned_name, 'No Icon Image Available'),
            'Character Main Image URL': character_main_image.get(cleaned_name, 'No Main Image Available'), # Added main image URL for error case
            'Profile URL': profile_url,
            'Character Info': f"An unexpected error occurred: {e}", # Indicate error in info field
        })

# Now character_details_list contains the scraped data for each character
# The next step is to convert this into a DataFrame and save to CSV
print(f"Finished scraping details for {len(character_details_list)} characters.")

In [None]:
# Convert the list of dictionaries into a pandas DataFrame
character_df = pd.DataFrame(character_details_list)

# Display the first few rows to verify the data
display(character_df.head())

# Define the CSV file name
csv_file = 'character_data.csv'

# Save the DataFrame to a CSV file
character_df.to_csv(csv_file, index=False)

print(f"Character details saved to '{csv_file}'")

## Safe Codes

## Trohpy

## Weapon Upgrades

## Enemies