In [333]:
import pandas as pd
import numpy as np
import os


In [334]:
all_areas = set()
def process_textfile(file_path: str) -> pd.DataFrame:
    # Create a dictionary to store the data
    data = {"Area": [], "Type": [], "Description": []}

    # Read the text file
    with open(file_path, "r") as f:
        lines = [line.strip() for line in f]
        
    areas = []

    # Iterate through the lines
    for line in lines:
        if line.startswith('In '):
            temp = line[3:].split(": ", 1)
            # Get the Area before the ':'
            area = temp[0].strip()
            # print(f"area: {area}")
            areas.append(area)
            
            # Get Description:
            detail = temp[1].strip()
            # print(f"test description parsing: {detail}")
            data['Area'].append(area)
            data['Type'].append(os.path.splitext(os.path.basename(file_path))[0])
            data['Description'].append(detail)
        elif line.startswith('Excluded'):
            break
        else:
            continue
    # print(f"all areas: {areas}")
    all_areas.update(areas)
    print(f"all areas: {all_areas}")

    # Create a pandas DataFrame from the data dictionary
    df = pd.DataFrame(data)

    # Print the DataFrame
    df.head()
    
    return df

In [335]:
# List all files in the src folder
all_files = os.listdir()

# Should only have ONE source (src) folder!!!
src_dir = [folder for folder in all_files if folder.startswith('src_')]
if not src_dir:
    raise Exception('No Source Directory found!')
if len(src_dir) > 1:
    raise Exception('Only 1 Source Directory allowed, formatted as "src_..."')
print(src_dir[0])

text_files = [f"{src_dir[0]}/{file}" for file in os.listdir(src_dir[0]) if file.endswith('.txt')]
print(text_files)

src_BP_April_Rando_Any%
['src_BP_April_Rando_Any%/Major bosses.txt', 'src_BP_April_Rando_Any%/Merchant shops.txt', 'src_BP_April_Rando_Any%/Shadow Realm Blessing pickups.txt', 'src_BP_April_Rando_Any%/Vanilla major key item locations.txt']


In [336]:
# Process the text files and combine the resulting DataFrames
dataframes = [process_textfile(file_path) for file_path in text_files]
combined_df = pd.concat(dataframes, ignore_index=True)

# print(combined_df)
combined_df.head()

all areas: {'Academy of Raya Lucaria', 'Stormhill', 'Weeping Peninsula', 'Castle Ensis', 'Mountaintops of the Giants', 'Ainsel River', 'Nokron, Eternal City', 'Stone Platform', 'Caelid', 'Ashen Leyndell', 'Jagged Peak', 'Enir-Ilim', 'Ancient Ruins of Rauh', 'Siofra River', 'Leyndell', 'Scadutree Base', 'Finger Birthing Grounds', 'Stormveil Castle', 'Volcano Manor', 'Specimen Storehouse', 'Farum Azula', 'Mohgwyn', 'Ainsel River Downstream', 'Haligtree', 'Liurnia', "Midra's Manse", 'Scaduview', 'Altus Plateau', 'Shadow Keep', 'Flame Peak', 'Stone Coffin Fissure', 'Subterranean Shunning-Grounds', 'Ruin-Strewn Precipice', 'Belurat'}
all areas: {'Academy of Raya Lucaria', 'Stormhill', 'Weeping Peninsula', 'Castle Ensis', 'Mountaintops of the Giants', 'Ainsel River', 'Nokron, Eternal City', 'Stone Platform', 'Capital Outskirts', 'Caelid', 'Ashen Leyndell', 'Jagged Peak', 'Enir-Ilim', 'Ancient Ruins of Rauh', 'Siofra River', 'Leyndell', 'Scadutree Base', 'Finger Birthing Grounds', 'Stormveil 

Unnamed: 0,Area,Type,Description
0,Stormhill,Major bosses,Dropped by Margit the Fell Omen or Morgott the...
1,Stormveil Castle,Major bosses,Dropped by Godrick the Grafted. Replaces Godri...
2,Weeping Peninsula,Major bosses,Dropped by Leonine Misbegotten south of Castle...
3,Liurnia,Major bosses,Dropped by Royal Knight Loretta. Replaces Lore...
4,Academy of Raya Lucaria,Major bosses,Dropped by Red Wolf of Radagon. Replaces Memor...


In [337]:
combined_df.to_excel("output/combined/combined_data.xlsx", index=False, engine='openpyxl')

In [338]:
# Sort the combined_df DataFrame by the "Area" column
sorted_df = combined_df.sort_values(by="Area").reset_index(drop=True)

# Print the sorted DataFrame
sorted_df.head()

Unnamed: 0,Area,Type,Description
0,Abyssal Woods,Shadow Realm Blessing pickups,In front of the statue pedestal in Abandoned C...
1,Abyssal Woods,Shadow Realm Blessing pickups,On a corpse in front of a tree in the ravine n...
2,Academy of Raya Lucaria,Vanilla major key item locations,In a chest unlocked by Discarded Palace Key in...
3,Academy of Raya Lucaria,Vanilla major key item locations,On a robed corpse in the Raya Lucaria rooftop ...
4,Academy of Raya Lucaria,Merchant shops,Sold by Isolated Merchant southeast of Main Ac...


In [339]:
sorted_df.to_excel("output/combined/sorted_data.xlsx", index=False, engine='openpyxl')

In [340]:
area_keywords = {
    'Limgrave': ['Limgrave', 'Stormhill', 'Knowledge', 'Chapel of Anticipation', 'Stranded Graveyard', 'Beastman', 'Godrick', 'Stormveil', 'Margit', 'Weeping Peninsula', 'Leonine Misbegotten'], 
    'Liurnia of the Lakes': ['Liurnia', 'Academy of Raya Lucaria', 'Makar', 'Bellum', 'Carian', 'Moonlight', 'Red Wolf of Radagon', 'Royal Knight', 'Ruin-Strewn', 'Adula', 'Alecto', 'Moonlight Altar'], 
    'Caelid': ['Redmane', 'Radahn', 'Caelid', 'Dragonbarrow'], 
    'Altus Plateau': ['Altus Plateau', 'Altus Tunnel', 'Capital Outskirts', 'Shaded Castle'], 
    'Mt. Gelmir': ['Mt. Gelmir', 'Volcano Manor', 'Rykard'], 
    'Leyndell, Royal Capital': ['Royal Capital', 'Leyndell', 'Subterranean Shunning-Grounds', 'Divine Tower of East Altus', 'Divine Tower of West Altus', 'the Omen'],
    'Mountaintops of the Giants': ['Mountaintops', 'Niall', 'Sol', 'Fire Giant', 'Flame Peak', 'Forbidden'],
    'Consecrated Snowfield': ['Consecrated Snowfield'],
    "Miquella's Haligtree": ['Haligtree', 'Malenia'],
    'Crumbling Farum Azula': ['Farum Azula', 'Dragon Temple', 'Godskin Duo', 'Maliketh'],
    'Leyndell, Ashen Capital': ['Ashen'],
    'Underground': ['Underground', 'Siofra', 'Ancestor Spirit', 'Nokron', 'Regal', 'Valiant Gargoyles', 'Mimic Tear', 'Lake of Rot', 'Astel, Naturalborn', 'Deeproot Depths', 'Ainsel', 'Nokstella', 'Dragonkin'],
    'Mohgwyn Dynasty Mausoleum': ['Lord of Blood', 'Mohgwyn'],
    'Roundtable': ['Roundtable'],
    'Gravesite Plain': ['Gravesite Plain', 'Belurat', 'Castle Ensis', "Dragon's Pit", 'Western Nameless', 'Dancing Lion', 'Ellac'],
    'Scadu Altus': ['Scadu Altus', 'Shadow Keep', 'Specimen Storehouse', 'Scaduview', 'Scadutree Base', 'Fort of Reprimand', 'Northern Nameless', 'Birthing', 'Rauh Base'],
    'Hinterlands': ['Hinterlands', 'Manus Metyr'],
    'Cerulean Coast': ['Cerulean Coast', 'Southern Nameless', 'Stone Coffin Fissure'],
    'Jagged Peak': ['Jagged Peak', 'Lamenter'],
    'Abyssal Woods': ['Abyssal Woods', 'Eastern Nameless', 'Darklight', 'Midra' ],
    'Ancient Ruins of Rauh': ['Ancient Ruins of Rauh', 'Bud'],
    'Enir-Ilim': ['Enir-Ilim', 'Consort'],
    'Stone Platform': ['Stone Platform']
}

output_dir = 'output/'

In [341]:
for key, keywords in area_keywords.items():
    # Iterate over the unique values in the "Area" column and check if they contain any keywords
    areas_to_keep = [area for area in sorted_df["Area"].unique() if any(keyword in area for keyword in keywords)]

    area_keywords[key] = areas_to_keep
    
# print(area_keywords)

for key, areas_to_keep in area_keywords.items():    
    # Filter the sorted_df DataFrame based on the areas_to_keep list
    filtered_df = sorted_df[sorted_df["Area"].isin(areas_to_keep)]
    # print(filtered_df.head())
    
    # print(key)
    filtered_df.to_excel(f"{output_dir}{key}.xlsx", index=False, engine='openpyxl')

    # Drop the filtered_df entries from the original sorted_df
    sorted_df = sorted_df.drop(filtered_df.index)

# Print the remaining DataFrame
print(f"\nShould be empty:")
print(sorted_df)



Should be empty:
Empty DataFrame
Columns: [Area, Type, Description]
Index: []


In [342]:
# Combine into one spreadsheet but separate sheets:
combined_output = f"ALL_Combined"
merged_output = f'ALL_MERGED'

# output_files = [f"{output_dir}{file}" for file in os.listdir(output_dir) if file.endswith('.xlsx') and combined_output not in file and merged_output not in file]
# print(f"All output files: {output_files}")

combined_output = f"{output_dir}{combined_output}.xlsx"
merged_output = f'{output_dir}{merged_output}.xlsx'


with pd.ExcelWriter(combined_output , mode='a', if_sheet_exists='replace') as writer:
    # for file in output_files:    
    for key, _ in area_keywords.items():                     
        # if file.endswith('.xls') or file.endswith('.xlsx'):
        file = f"{output_dir}{key}.xlsx"
        cur_sheet = os.path.splitext(os.path.basename(file))[0]
        print(f"Current sheet is: {cur_sheet}")
        df = pd.read_excel(file)
        df.to_excel(writer, sheet_name=cur_sheet)

Current sheet is: Limgrave
Current sheet is: Liurnia of the Lakes
Current sheet is: Caelid
Current sheet is: Altus Plateau
Current sheet is: Mt. Gelmir
Current sheet is: Leyndell, Royal Capital
Current sheet is: Mountaintops of the Giants
Current sheet is: Consecrated Snowfield
Current sheet is: Miquella's Haligtree
Current sheet is: Crumbling Farum Azula
Current sheet is: Leyndell, Ashen Capital
Current sheet is: Underground
Current sheet is: Mohgwyn Dynasty Mausoleum
Current sheet is: Roundtable
Current sheet is: Gravesite Plain
Current sheet is: Scadu Altus
Current sheet is: Hinterlands
Current sheet is: Cerulean Coast
Current sheet is: Jagged Peak
Current sheet is: Abyssal Woods
Current sheet is: Ancient Ruins of Rauh
Current sheet is: Enir-Ilim
Current sheet is: Stone Platform


In [343]:
# Combine into one SHEET on a single spreadsheet
df = pd.read_excel(f"{combined_output}", sheet_name=None, index_col=None)
print(f"{df}")
cdf = pd.concat(df.values(), ignore_index=True)

print(cdf)

# with pd.ExcelWriter(dir_output , mode='a', if_sheet_exists='replace') as writer:
    
#     cdf.to_excel(writer, sheet_name=f"ALL_TOGETHER")
# openpyxl, xlsxwriter
cdf.to_excel(merged_output, engine='openpyxl', sheet_name='Merged', header=True, index=False)

{'Sheet1': Empty DataFrame
Columns: []
Index: [], 'Limgrave':     Unnamed: 0                       Area                              Type  \
0            0                   Limgrave  Vanilla major key item locations   
1            1                   Limgrave                    Merchant shops   
2            2                   Limgrave                    Merchant shops   
3            3                   Limgrave                    Merchant shops   
4            4                   Limgrave                    Merchant shops   
5            5  Limgrave - Murkwater Cave                    Merchant shops   
6            6                  Stormhill                      Major bosses   
7            7           Stormveil Castle  Vanilla major key item locations   
8            8           Stormveil Castle                      Major bosses   
9            9          Weeping Peninsula                    Merchant shops   
10          10          Weeping Peninsula                      Major 