In [18]:
import os
import requests
import pandas as pd
from urllib.parse import urlparse
import time
import json
import ast # Import ast for literal_eval



In [19]:
def download_images_from_csv(csv_file_path, image_url_column, subfolder):
    try:
        df = pd.read_csv(csv_file_path)
    except FileNotFoundError:
        print(f"Error: CSV file not found at {csv_file_path}")
        return

    image_dir = os.path.join('Images', subfolder)
    if not os.path.exists(image_dir):
        os.makedirs(image_dir)

    print(f"Downloading images from '{csv_file_path}' to '{image_dir}'...")


    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept-Encoding': 'gzip, deflate, br',
        'DNT': '1',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
    }

    for index, row in df.iterrows():
        image_url = row.get(image_url_column)
        if pd.isna(image_url) or not isinstance(image_url, str):
            continue 

        if image_url.lower().endswith(('.jpeg/show', '.png/show', '.jpg/show', '.webp/show')):
            try:
                response = requests.get(image_url, stream=True, headers=headers, timeout=30)
                response.raise_for_status() 
                url_path = urlparse(image_url).path
                if url_path.endswith('/show'):
                    clean_path = url_path[:-5]  
                    original_filename = os.path.basename(clean_path)
                else:
                    original_filename = os.path.basename(url_path)
                
                if not original_filename or '.' not in original_filename:
                    content_type = response.headers.get('content-type', '')
                    if 'jpeg' in content_type or 'jpg' in content_type:
                        ext = '.jpg'
                    elif 'png' in content_type:
                        ext = '.png'
                    elif 'webp' in content_type:
                        ext = '.webp'
                    else:
                        ext = '.jpg'  
                    original_filename = f"image_{index}{ext}"

                filename = f"{index}_{original_filename}"
                image_path = os.path.join(image_dir, filename)

                with open(image_path, 'wb') as img_file:
                    for chunk in response.iter_content(chunk_size=8192):
                        if chunk: 
                            img_file.write(chunk)

                print(f"Downloaded: {filename}")

               
                time.sleep(0.5)

            except requests.exceptions.RequestException as e:
                print(f"Error downloading {image_url}: {e}")
            except Exception as e:
                print(f"An unexpected error occurred while processing {image_url}: {e}")

    print(f"Finished downloading images from '{csv_file_path}'.")


#download_images_from_csv('chapter_data.csv', 'Chapter Image', 'Chapter_images')
#download_images_from_csv('character_data.csv', 'Character Icon URL', 'Character_icon_images')
#download_images_from_csv('character_data.csv', 'Character Main Image URL', 'Character_images')
#download_images_from_csv('enemy_data.csv', 'enemy image url', 'enemy_images')
#download_images_from_csv('tips_data.csv', 'Tip Image', 'tips_images')
#download_images_from_csv('trophy_data.csv', 'Image URL', 'trophy_images')
#download_images_from_csv('weapons_data.csv', 'Weapon Icon URL', 'weapon_images')

In [20]:
how_to_kill_df = pd.read_csv('how_to_kill_enemy.csv')

for index, row in how_to_kill_df.iterrows():
    enemy_name = row['Enemy name']
    how_to_kill_guide_str = row['How to Kill Guide']


In [21]:
# Define the base directory for saving images
base_image_dir = os.path.join('Images', 'How_to_kill_enemy_images')
if not os.path.exists(base_image_dir):
    os.makedirs(base_image_dir)

# Headers to mimic a real browser and avoid being blocked
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'DNT': '1',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
}

for index, row in how_to_kill_df.iterrows():
    enemy_name = row['Enemy name']
    how_to_kill_guide_str = row['How to Kill Guide']

    # Handle cases where the guide data is an error message or not a valid dictionary string
    if not how_to_kill_guide_str or how_to_kill_guide_str.startswith('Error'):
        print(f"Skipping image extraction for '{enemy_name}' due to missing or error guide data.")
        continue

    try:
        # Convert the string representation of the dictionary back to a dictionary
        # Use ast.literal_eval for safer evaluation if json.loads fails due to nuances
        # However, given the structure, json.loads might work if the string is properly formatted JSON
        # Let's try json.loads first and fallback if needed, or handle potential errors
        try:
            how_to_kill_guide_dict = json.loads(how_to_kill_guide_str.replace("'", "\"")) # Replace single quotes with double for JSON compatibility
        except json.JSONDecodeError:
            # Fallback to ast.literal_eval if JSON parsing fails
            import ast
            how_to_kill_guide_dict = ast.literal_eval(how_to_kill_guide_str)


        # Extract image URLs from the dictionary structure
        # The structure is a dictionary where keys are headings and values are lists of content dictionaries
        image_urls = []
        for heading, content_list in how_to_kill_guide_dict.items():
            if isinstance(content_list, list):
                for item in content_list:
                    if isinstance(item, dict) and item.get('type') == 'image' and 'url' in item:
                        image_urls.append(item['url'])

        # Download the extracted images
        for i, image_url in enumerate(image_urls):
            if not image_url or not image_url.startswith('http'):
                continue # Skip if the image URL is missing or not a valid URL

            try:
                response = requests.get(image_url, stream=True, headers=headers, timeout=30)
                response.raise_for_status() # Raise an exception for bad status codes

                # Extract filename from URL
                url_path = urlparse(image_url).path
                if url_path.endswith('/show'):
                    clean_path = url_path[:-5]
                    original_filename = os.path.basename(clean_path)
                else:
                    original_filename = os.path.basename(url_path)

                if not original_filename or '.' not in original_filename:
                    content_type = response.headers.get('content-type', '')
                    if 'jpeg' in content_type or 'jpg' in content_type:
                        ext = '.jpg'
                    elif 'png' in content_type:
                        ext = '.png'
                    elif 'webp' in content_type:
                        ext = '.webp'
                    else:
                        ext = '.jpg'
                    original_filename = f"image_{index}_{i}{ext}" # Use index and image count for unique name


                # Construct a unique filename including enemy name, row index, and image index
                # Sanitize enemy_name for use in filename
                safe_enemy_name = "".join([c if c.isalnum() or c in (' ', '-', '_') else '' for c in enemy_name]).replace(' ', '_')
                filename = f"{safe_enemy_name}_{index}_{i}_{original_filename}"
                image_path = os.path.join(base_image_dir, filename)

                with open(image_path, 'wb') as img_file:
                    for chunk in response.iter_content(chunk_size=8192):
                        if chunk:
                            img_file.write(chunk)

                print(f"Downloaded: {filename}")

                # Small delay
                time.sleep(0.5)

            except requests.exceptions.RequestException as e:
                print(f"Error downloading {image_url}: {e}")
            except Exception as e:
                print(f"An unexpected error occurred while processing {image_url}: {e}")

    except (json.JSONDecodeError, SyntaxError) as e:
        print(f"Error parsing guide data for '{enemy_name}': {e}")
    except Exception as e:
        print(f"An unexpected error occurred while processing guide for '{enemy_name}': {e}")

print("Finished processing How to Kill guides and downloading images.")

Downloaded: Runners_0_0_e6cb795c9598aab56dcfac299e254ea7.jpeg
Downloaded: Runners_0_1_b28115a05f1fe7505b9c5c84311fc827.jpeg
Downloaded: Runners_0_2_356739c8b19ba518de77382119f06bfb.jpeg
Downloaded: Runners_0_3_1585ee707a412239544f597ff23b3365.jpeg
Downloaded: Stalkers_1_0_e72f76b3a6874dec3fcee44426f1ec07.jpeg
Downloaded: Stalkers_1_1_170c4e80530f3793dfcd94c96f417687.jpeg
Downloaded: Stalkers_1_2_645ff0ce0cbffc36c107aa36f557a591.jpeg
Downloaded: Stalkers_1_3_5f2bc1698a44c9ba76e0849fecfad7f0.jpeg
Downloaded: Clickers_2_0_0515a1c04f337ba05458b43745fa6c1d.jpeg
Downloaded: Clickers_2_1_e614eab058d2b82e5e5337332a3b1a9f.jpeg
Downloaded: Bloaters_3_0_1bafae84a1dd4796f556f1ea53751ecf.jpeg
Downloaded: Bloaters_3_1_39e8a8575d63662fa6d7df1f50ff8481.jpeg
Downloaded: Bloaters_3_2_9faf6b02140ab7aeae766f53126f5c2a.jpeg
Downloaded: Bloaters_3_3_eaed3156f84ed3f51ffa3cc918933442.jpeg
Downloaded: Bloaters_3_4_d8d2266ebe6c4425ddea998b0eb53990.jpeg
Downloaded: Shamblers_4_0_bedcc8bb79b5be7c4714ab8c1a811895.

In [22]:
safe_codes_df = pd.read_csv('safe_codes_.csv')

for index, row in safe_codes_df.iterrows():
    location = row['Location']
    steps_to_safe_str = row['Steps to Safe']

    # The next steps will involve processing steps_to_safe_str
    # print(f"Processing steps for location: {location}")
    # print(f"Steps data (as string): {steps_to_safe_str[:100]}...") # Print first 100 characters to peek

In [23]:

# Define the base directory for saving images for safe codes
base_image_dir_safecodes = os.path.join('Images', 'safe_code_images')
if not os.path.exists(base_image_dir_safecodes):
    os.makedirs(base_image_dir_safecodes)

# Headers to mimic a real browser and avoid being blocked
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'DNT': '1',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
}

for index, row in safe_codes_df.iterrows():
    location = row['Location']
    steps_to_safe_str = row['Steps to Safe']

    # Handle cases where the steps data is an error message or not a valid list string
    if not steps_to_safe_str or steps_to_safe_str.startswith('Error'):
        print(f"Skipping image extraction for '{location}' due to missing or error steps data.")
        continue

    try:
        # Convert the string representation of the list back to a list
        # Use ast.literal_eval as the string might contain single quotes and other non-JSON elements
        steps_to_safe_list = ast.literal_eval(steps_to_safe_str)

        # Extract image URLs from the list of dictionaries structure
        # The structure is a list where each item is a dictionary for a step, containing an 'images' key with a list of URLs
        image_urls = []
        if isinstance(steps_to_safe_list, list):
            for step in steps_to_safe_list:
                if isinstance(step, dict) and 'images' in step and isinstance(step['images'], list):
                    for url in step['images']:
                        if isinstance(url, str):
                            image_urls.append(url)

        # Download the extracted images
        for i, image_url in enumerate(image_urls):
            if not image_url or not image_url.startswith('http'):
                continue # Skip if the image URL is missing or not a valid URL

            # Filter URLs based on the specified endings
            if image_url.lower().endswith(('.jpeg/show', '.png/show', '.jpg/show', '.webp/show')):
                try:
                    response = requests.get(image_url, stream=True, headers=headers, timeout=30)
                    response.raise_for_status() # Raise an exception for bad status codes

                    # Extract filename from URL path before '/show'
                    url_path = urlparse(image_url).path
                    if url_path.endswith('/show'):
                        clean_path = url_path[:-5]  # Remove '/show'
                        original_filename = os.path.basename(clean_path)
                    else:
                        original_filename = os.path.basename(url_path)

                    # If we still don't have a proper filename, create one
                    if not original_filename or '.' not in original_filename:
                        # Try to get extension from Content-Type header
                        content_type = response.headers.get('content-type', '')
                        if 'jpeg' in content_type or 'jpg' in content_type:
                            ext = '.jpg'
                        elif 'png' in content_type:
                            ext = '.png'
                        elif 'webp' in content_type:
                            ext = '.webp'
                        else:
                            ext = '.jpg'  # Default fallback
                        original_filename = f"image_{index}_{i}{ext}" # Use index and image count for unique name

                    # Construct a unique filename including location, row index, and image index
                    # Sanitize location name for use in filename
                    safe_location_name = "".join([c if c.isalnum() or c in (' ', '-', '_') else '' for c in location]).replace(' ', '_')
                    filename = f"{safe_location_name}_{index}_{i}_{original_filename}"
                    image_path = os.path.join(base_image_dir_safecodes, filename)

                    with open(image_path, 'wb') as img_file:
                        for chunk in response.iter_content(chunk_size=8192):
                            if chunk:
                                img_file.write(chunk)

                    print(f"Downloaded: {filename}")

                    # Small delay to be respectful to the server
                    time.sleep(0.5)

                except requests.exceptions.RequestException as e:
                    print(f"Error downloading {image_url}: {e}")
                except Exception as e:
                    print(f"An unexpected error occurred while processing {image_url}: {e}")

    except (SyntaxError, ValueError) as e:
        print(f"Error parsing steps data for '{location}': {e}")
    except Exception as e:
        print(f"An unexpected error occurred while processing steps for '{location}': {e}")

print("Finished processing Safe Codes steps and downloading images.")

Downloaded: Super_Market_0_0_777d5e815510bdb8ca530173b7bce0ff.jpeg
Downloaded: Super_Market_0_1_21f8d389280d2d8bfd8d366cb0bc1bac.jpeg
Downloaded: Super_Market_0_2_62e5595200f35bce426616a031c577b0.jpeg
Downloaded: Super_Market_0_3_942637bc499d497b9100e95461cc79d3.jpeg
Downloaded: Super_Market_0_4_5b2a0e0fb18f20f5f71d5dac93929239.jpeg
Downloaded: Bank_Vault_1_0_dc0025195cee38b70794bd866ac0973b.png
Downloaded: Bank_Vault_1_1_ad64fa5820403221a293842d13de2028.png
Downloaded: Bank_Vault_1_2_d4f58b5e0df23d35e94d33ca87894755.jpeg
Downloaded: Bank_Vault_1_3_0a24a4f672ee401df75c95d85538b826.jpeg
Downloaded: Bank_Vault_1_4_76ae20ee9d55654f4227a28ffc666e59.jpeg
Downloaded: Courthouse_2_0_c3909513bc64950314afaf5d81ba3fc7.jpeg
Downloaded: Courthouse_2_1_eb6b7162afb277a1ca9f7518be914a28.jpeg
Downloaded: West_Gate_2_3_0_81b1016048b9a78e4ddac70c216b681e.jpeg
Downloaded: West_Gate_2_3_1_7af8302743e8a020a4dd4350f608856c.jpeg
Downloaded: Thrift_Store_4_0_574b03d5bd5fd37798729f17feb47928.jpeg
Downloaded: T