## Importing libraries

In [65]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import re
import csv

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [58]:
site = 'https://game8.co/games/Last-of-Us-2/archives/290290' # walkthrough page
response = requests.get(site)
response

<Response [200]>

In [59]:
soup = BeautifulSoup(response.text, 'html.parser')

## Gets the chapter images

In [60]:
def get_filtered_image_links(soup, target_names):
    images = []
    img_tags = soup.find_all('img')
    
    for img in img_tags:
        img_url = img.get('data-src') or img.get('src')
        
        if img_url:
            if img_url.startswith('//'):
                img_url = 'https:' + img_url
            elif img_url.startswith('/'):
                img_url = 'https://game8.co' + img_url

            alt_text = img.get('alt', '').lower()
            
            if any(target_name.lower() in alt_text for target_name in target_names):
                images.append({
                    'url': img_url,
                    'alt': alt_text
                })
    
    return images

In [91]:
find_names = [
    'Jackson', 'Seattle Day 1', 'Seattle Day 2', 'Seattle Day 3', 'The Park', 
    'The Farm', 'Santa Barbara'
]

filtered_images = get_filtered_image_links(soup, find_names)

chapter_images = {}
for image in filtered_images:
    for chapter in find_names:
        if chapter.lower() in image['alt']:
            chapter_images[chapter] = image['url']

print(chapter_images)

{'Jackson': 'https://img.game8.co/3252676/016d74f47a69207695a8bd98faa9df04.jpeg/show', 'Seattle Day 1': 'https://img.game8.co/3252681/208e3bd72b0c4ce49f7c2ee308853d6a.jpeg/show', 'Seattle Day 2': 'https://img.game8.co/3252682/541861fadb8610c141286de7d2f98d0c.jpeg/show', 'Seattle Day 3': 'https://img.game8.co/3255016/3fb74e357dc25e5e5368f6cfb994bd54.jpeg/show', 'The Park': 'https://img.game8.co/3252680/68d10ee83ce0e2bad1b133e3aaef1019.jpeg/show', 'The Farm': 'https://img.game8.co/3252775/c195c6ace9970eb25b97e3e7dc47d8a8.png/show', 'Santa Barbara': 'https://img.game8.co/3252774/2a7f34405d54ee3b0e1ebfdecb2831d5.png/show'}


## Get the chapter data and combine it with the images data to get a final CSV

In [92]:
chapters = soup.find_all('a', class_='list_contents')
chapter_names = [chapter.get_text(strip=True) for chapter in chapters]

base_url = "https://game8.co"
sub_chapters_dict = {}

chapters_header = soup.find_all('h3', class_='a-header--3')

for chapter in chapters_header:
    chapter_title = chapter.get_text(strip=True)
    table = chapter.find_next('table', class_='a-table')
    sub_chapters = table.find_all('td', class_='center')
    
    sub_chapters_list = []
    
    for sub_chapter in sub_chapters:
        link = sub_chapter.find('a', class_='a-link')
        if link:
            sub_chapters_list.append({
                'text': link.get_text(strip=True),
                'url': base_url + link['href']
            })
    
    sub_chapters_dict[chapter_title] = sub_chapters_list

In [93]:
csv_data = []

for chapter_key in sub_chapters_dict.keys():
    chapter_name = chapter_key.split(':')[-1].strip()

    row = {
        'Chapter Name': chapter_key,  
        'Chapter Image': chapter_images.get(chapter_name, 'No Image Available'),  
        'Sub-chapters': ', '.join([sub_chap['text'] for sub_chap in sub_chapters_dict.get(chapter_key, [])]),
        'Sub-chapter URLs': ', '.join([sub_chap['url'] for sub_chap in sub_chapters_dict.get(chapter_key, [])]),
    }
    csv_data.append(row)


csv_file = 'chapter_data.csv'

with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=['Chapter Name', 'Chapter Image', 'Sub-chapters', 'Sub-chapter URLs'])
    writer.writeheader()
    writer.writerows(csv_data)

print(f"CSV file '{csv_file}' created successfully!")

CSV file 'chapter_data.csv' created successfully!


## Chapter walkthrough

In [94]:
chapters_df = pd.read_csv('chapter_data.csv')
display(chapters_df.head())

Unnamed: 0,Chapter Name,Chapter Image,Sub-chapters,Sub-chapter URLs
0,Chapter 1: Jackson,https://img.game8.co/3252676/016d74f47a6920769...,"Prologue, Waking Up, The Overlook, Patrol, The...",https://game8.co/games/Last-of-Us-2/archives/2...
1,Chapter 2: Seattle Day 1,https://img.game8.co/3252681/208e3bd72b0c4ce49...,"The Gate, Downtown, Eastbrook Elementary, Capi...",https://game8.co/games/Last-of-Us-2/archives/2...
2,Chapter 3: Seattle Day 2,https://img.game8.co/3252682/541861fadb8610c14...,"Hillcrest, Finding Strings, The Seraphites, St...",https://game8.co/games/Last-of-Us-2/archives/2...
3,Chapter 4: Seattle Day 3,https://img.game8.co/3255016/3fb74e357dc25e5e5...,"Road to the Aquarium, The Flooded City, Infilt...",https://game8.co/games/Last-of-Us-2/archives/2...
4,Chapter 5: The Park,https://img.game8.co/3252680/68d10ee83ce0e2bad...,Tracking Lesson,https://game8.co/games/Last-of-Us-2/archives/2...


In [99]:
all_walkthrough_data = []

for index, row in chapters_df.iterrows():
    chapter_name = row['Chapter Name']
    sub_chapter_urls_string = row['Sub-chapter URLs']
    sub_chapter_urls_list = sub_chapter_urls_string.split(', ')

print(f"Chapter: {chapter_name}")
print(f"Sub-chapter URLs list: {sub_chapter_urls_list}")

Chapter: Chapter 11: The Farm
Sub-chapter URLs list: ['https://game8.co/games/Last-of-Us-2/archives/290846']


In [100]:
all_walkthrough_data = []

for index, row in chapters_df.iterrows():
    chapter_name = row['Chapter Name']
    sub_chapter_urls_string = row['Sub-chapter URLs']

    sub_chapter_urls_list = sub_chapter_urls_string.split(', ')
    sub_chapter_names_string = row['Sub-chapters']
    sub_chapter_names_list = sub_chapter_names_string.split(', ')


    for i, sub_chapter_url in enumerate(sub_chapter_urls_list):
        try:
            response = requests.get(sub_chapter_url)
            response.raise_for_status()  

            chapter_soup = BeautifulSoup(response.text, 'html.parser')


            sub_chapter_tag = chapter_soup.find('h2', class_='a-header--2')
            sub_chapter_name = sub_chapter_tag.get_text(strip=True) if sub_chapter_tag else sub_chapter_names_list[i].strip()


            spans = chapter_soup.find_all('span', style="font-size:120%;")
            extracted_text = [span.get_text(strip=True) for span in spans]

            formatted_text = []
            if extracted_text:
                first_item = extracted_text[0].rstrip('.')
                formatted_text.append(first_item[0].upper() + first_item[1:].lower())

                for item in extracted_text[1:]:
                    formatted_text.append(item.lower())

            # Group formatted text into lines of two
            grouped_text = []
            for j in range(0, len(formatted_text), 2):
                line_items = formatted_text[j:j+2]
                if len(line_items) > 1:
                    grouped_text.append(", and ".join(line_items))
                elif line_items:
                    grouped_text.append(line_items[0])

            all_walkthrough_data.append({
                'Chapter Name': chapter_name,
                'Sub-chapter Name': sub_chapter_name,
                'Sub-chapter URL': sub_chapter_url,
                'Walkthrough Text': "\n".join(grouped_text)
            })

        except requests.exceptions.RequestException as e:
            print(f"Error fetching {sub_chapter_url}: {e}")
            all_walkthrough_data.append({
                'Chapter Name': chapter_name,
                'Sub-chapter Name': sub_chapter_names_list[i].strip() if i < len(sub_chapter_names_list) else 'N/A',
                'Sub-chapter URL': sub_chapter_url,
                'Walkthrough Text': f"Error fetching data: {e}"
            })
        except Exception as e:
            print(f"An unexpected error occurred while processing {sub_chapter_url}: {e}")
            all_walkthrough_data.append({
                'Chapter Name': chapter_name,
                'Sub-chapter Name': sub_chapter_names_list[i].strip() if i < len(sub_chapter_names_list) else 'N/A',
                'Sub-chapter URL': sub_chapter_url,
                'Walkthrough Text': f"An unexpected error occurred: {e}"
            })


In [101]:
walkthrough_df = pd.DataFrame(all_walkthrough_data)
display(walkthrough_df.head())

Unnamed: 0,Chapter Name,Sub-chapter Name,Sub-chapter URL,Walkthrough Text
0,Chapter 1: Jackson,Jackson - Prologue Walkthrough,https://game8.co/games/Last-of-Us-2/archives/2...,"Follow tommy all the way to town, and play the..."
1,Chapter 1: Jackson,Jackson - Waking Up Walkthrough,https://game8.co/games/Last-of-Us-2/archives/2...,"Follow jesse, and follow jesse and maria.\nhav..."
2,Chapter 1: Jackson,Jackson - The Overlook Walkthrough,https://game8.co/games/Last-of-Us-2/archives/2...,"Follow owen, and after you and owen split up, ..."
3,Chapter 1: Jackson,Jackson - Patrol Walkthrough,https://game8.co/games/Last-of-Us-2/archives/2...,"Follow dina, and after getting off your horse,..."
4,Chapter 1: Jackson,Jackson: The Horde Walkthrough,https://game8.co/games/Last-of-Us-2/archives/2...,"Run away from the horde of infected, and follo..."


In [102]:
walkthrough_df.to_csv('walkthrough_data_with_names.csv', index=False)
print("Walkthrough data with sub-chapter names saved to 'walkthrough_data_with_names.csv'")

Walkthrough data with sub-chapter names saved to 'walkthrough_data_with_names.csv'


## Tips and Tricks

## Characters

## Safe Codes

## Trohpy

## Weapon Upgrades

## Enemies