In [12]:
import requests
from bs4 import BeautifulSoup

def scrape_dialogue_and_descriptions(url):
    # Send a GET request to the webpage
    response = requests.get(url)
    
    # Parse the page content with BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Container to store all sections in their original order
    combined_content = []
    
    # Find all dialogue, description, and srmsg-container sections in the page, preserving order
    all_sections = soup.find_all(['div'], class_=['dialogue', 'srw-description-content', 'srmsg-container'])
    
    if not all_sections:
        print("No dialogue or description sections found.")
        return
    
    # Loop through each section (either dialogue, description, or srmsg-container)
    for section in all_sections:
        # Handle srmsg-container sections first (separate logic for message groups)
        if 'srmsg-container' in section.get('class', []):
            dialogues = []
            message_groups = section.find_all('div', class_='srmsg-group')
            for group in message_groups:
                # Extract author and messages
                author_tag = group.find('div', class_='srmsg-author')
                author = author_tag.get_text(strip=True) if author_tag else 'Unknown Author'
                for message_tag in group.find_all('div', class_='srmsg-content'):
                    dialogues.append(f"{author}: {message_tag.get_text(strip=True)}")
            
            # Handle player choices if any
            choices = section.find_all('div', class_='srmsg-choice')
            for choice in choices:
                choice_text = choice.find('div', class_='srmsg-choice-content').get_text(strip=True)
                dialogues.append(f"(Choice: {choice_text})")
                choice_results = choice.find_all('div', class_='srmsg-group-player')
                for result in choice_results:
                    result_author_tag = result.find('div', class_='srmsg-author')
                    result_author = result_author_tag.get_text(strip=True) if result_author_tag else 'Unknown Author'
                    result_message = result.find('div', class_='srmsg-content').get_text(strip=True)
                    dialogues.append(f"{result_author}: {result_message}")
            
            combined_content.append(('srmsg-container', dialogues))
        
        # Handle regular dialogue sections (separate from srmsg-container)
        elif 'dialogue' in section.get('class', []):
            dialogues = []
            for dl in section.find_all('dl'):
                for tag in dl.find_all(['dt', 'dd']):
                    dialogues.append(tag.get_text(strip=True))
            combined_content.append(('dialogue', dialogues))
        
        # Handle description sections
        elif 'srw-description-content' in section.get('class', []):
            description = section.get_text(strip=True)
            combined_content.append(('description', description))
    
    return combined_content

# Example usage
url = "https://honkai-star-rail.fandom.com/wiki/In_the_Dangerous_Muddy_Swamp"
content = scrape_dialogue_and_descriptions(url)

# Print the interwoven dialogues and descriptions
if content:
    for item_type, item_content in content:
        if item_type == 'dialogue':
            print("\n--- Dialogue Section ---")
            for dialogue in item_content:
                print(dialogue)
        elif item_type == 'srmsg-container':
            print("\n--- srmsg Dialogue Section ---")
            for dialogue in item_content:
                print(dialogue)
        elif item_type == 'description':
            print("\n--- Description Section ---")
            print(item_content)



--- Description Section ---
Cocoliawas defeated and the influence of theStellaronweakened with her demise.Bronyahad fabricated a white lie to let the people ofBelobogaccept the passing of theirSupreme Guardianin battle.Seelethen returned to theOverworldwith Bronya, who was utterly exhausted. At the same time, someone also needs to bring the news of the victory to theUnderworld.

--- Dialogue Section ---
(Approach theclinic)
▶March 7th:Look! There's a big crowd around the clinic.
▶March 7th:Do you think they felt the effects of the battle just now...? Let's go ask around.
(Idle quotes)
Male Civilian:What was that shaking just now?
Male Civilian:I thought the whole building was about to collapse...
Female Civilian:The ground just kept on shaking...
Female Civilian:Was it theFragmentum?
(Idle quotes)
Girl:Was it an earthquake?
Man:No, it couldn't have been an earthquake...
Girl:What about an avalanche?
Man:Eh, not an avalanche either...
Girl:Then could it be the end of the world?
Man:May

In [13]:
import time
def get_mission_links(main_url):
    # Send a GET request to the main Trailblaze Missions page
    response = requests.get(main_url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find the section containing the list of missions
    content_div = soup.find('div', {'class': 'mw-parser-output'})
    
    # Initialize a set to store unique mission links
    mission_links = set()
    
    # Iterate through all 'a' tags within the content division
    for a_tag in content_div.find_all('a', href=True):
        href = a_tag['href']
        # Filter for links that lead to mission pages
        if href.startswith('/wiki/') and 'Trailblaze_Mission' not in href:
            full_url = 'https://honkai-star-rail.fandom.com' + href
            mission_links.add(full_url)
    missions_sorted = sorted(mission_links, key=lambda x: x[41])
    return list(missions_sorted)

def scrape_all_missions():
    main_url = 'https://honkai-star-rail.fandom.com/wiki/Trailblaze_Mission'
    mission_links = get_mission_links(main_url)
    
    for url in mission_links:
        print(f"Scraping {url}...")
        #content = scrape_dialogue_and_descriptions(url)
        # Process the content as needed
        #time.sleep(1)  # Be respectful to the server by adding a delay between requests

# Ensure you have your scrape_dialogue_and_descriptions function defined here

scrape_all_missions()

Scraping https://honkai-star-rail.fandom.com/wiki/A_Grand_Cool_Adventure...
Scraping https://honkai-star-rail.fandom.com/wiki/A_Sunset_Rendezvous...
Scraping https://honkai-star-rail.fandom.com/wiki/A_Moment_of_Peace...
Scraping https://honkai-star-rail.fandom.com/wiki/A_Misfortune_of_Survivors...
Scraping https://honkai-star-rail.fandom.com/wiki/Aeon...
Scraping https://honkai-star-rail.fandom.com/wiki/A_Cleansing_of_Gold...
Scraping https://honkai-star-rail.fandom.com/wiki/A_Walk_Among_the_Tombstones...
Scraping https://honkai-star-rail.fandom.com/wiki/Amphoreus...
Scraping https://honkai-star-rail.fandom.com/wiki/All_About_Eve...
Scraping https://honkai-star-rail.fandom.com/wiki/Adventure_Mission...
Scraping https://honkai-star-rail.fandom.com/wiki/Astral_Express_Crew...
Scraping https://honkai-star-rail.fandom.com/wiki/Achievement...
Scraping https://honkai-star-rail.fandom.com/wiki/A_New_Venture_on_the_Eighth_Dawn...
Scraping https://honkai-star-rail.fandom.com/wiki/Astral_Express