In [15]:
import re
import csv

def clean_unspoiled_data(input_filename, output_filename):
    # Read the entire file as a string
    with open(input_filename, 'r', encoding='utf-8') as f:
        data = f.read()
    
    lines = data.split('\n')

    # Weâ€™ll store rows of [Time, Item Name, Location, Coordinates].
    rows = []

    for line in lines:
        line = line.strip()

        # We're looking for lines in the format:
        # |Time || {{item icon|Item Name}} || Slot || [[Location]] || (x..,y..) ...
        if not line.startswith('|'):
            continue
        
        # Split on '||'
        parts = [p.strip() for p in line.split('||')]
        if len(parts) < 5:
            continue

        # Extract the fields we care about:
        # parts[0] -> time  (remove leading '|')
        # parts[1] -> item
        # parts[3] -> location
        # parts[4] -> coordinate
        time = parts[0].lstrip('|').strip()
        item = parts[1]
        location = parts[3]
        coordinate = parts[4]

        # Combine entire line to detect questlink if needed
        entire_line = ' '.join(parts)

        # Skip if 'questlink' is in the line
        if re.search(r'questlink', entire_line, re.IGNORECASE):
            continue

        # Clean up the item name:
        # e.g., {{item icon|Broad Beans}} => Broad Beans
        item_clean = re.sub(r'\{\{.*?\|([^\}]+)\}\}', r'\1', item)
        # Remove leftover braces, "Collectable", "(Item)" text, etc.
        item_clean = re.sub(r'[{}]|\(Item\)|Collectable', '', item_clean).strip()

        # Skip if the item name has the word "cluster" (case-insensitive)
        if re.search(r'cluster', item_clean, re.IGNORECASE):
            continue

        # Clean up location, e.g. [[Il Mheg]] => Il Mheg
        location_clean = re.sub(r'\[\[|\]\]', '', location).strip()

        # Add the row
        rows.append([time, item_clean, location_clean, coordinate])

    # Write results to a CSV
    with open(output_filename, 'w', encoding='utf-8', newline='') as out_csv:
        writer = csv.writer(out_csv)
        writer.writerow(["Time", "Item Name", "Location", "Coordinates"])
        writer.writerows(rows)

if __name__ == "__main__":
    input_file = "unspoiled_nodes"       # The raw file in the current directory
    output_file = "cleaned_nodes.csv"    # Your desired output CSV
    clean_unspoiled_data(input_file, output_file)
    print(f"Processed '{input_file}' and wrote results to '{output_file}'.")


Processed 'unspoiled_nodes' and wrote results to 'cleaned_nodes.csv'.


In [16]:
import pandas as pd
import json
import re

# Load the JSON file with item IDs.
with open("item_ids.json", "r", encoding="utf-8") as f:
    item_json = json.load(f)

# Build a mapping: lower-case English item name -> item ID
item_mapping = {}
for item_id, names in item_json.items():
    en_name = names.get("en", "").strip().lower()
    item_mapping[en_name] = item_id

# Read in the cleaned nodes CSV.
nodes_df = pd.read_csv("cleaned_nodes.csv")

# Function to clean item names for matching:
# Remove occurrences of "(Rare)" (case-insensitive), then lower-case and strip.
def clean_item_name(name):
    # Remove the substring (Rare) along with any extra spaces
    name_clean = re.sub(r'\s*\(rare\)', '', name, flags=re.IGNORECASE)
    return name_clean.strip().lower()

nodes_df["Item Name Clean"] = nodes_df["Item Name"].apply(clean_item_name)

# Function to look up the item ID using the cleaned item name.
def get_item_id(row):
    name = row["Item Name Clean"]
    if name in item_mapping:
        return item_mapping[name]
    else:
        print(f"Error: No ID found for item '{row['Item Name']}' (cleaned as '{name}').")
        return None

# Apply the lookup function to each row.
nodes_df["ID"] = nodes_df.apply(get_item_id, axis=1)

# Reorder columns to prepend the ID.
final_df = nodes_df[["ID", "Time", "Item Name", "Location", "Coordinates"]]

# Write the final merged CSV.
final_df.to_csv("final_nodes_with_ids.csv", index=False)
print("Merged CSV written to 'final_nodes_with_ids.csv'.")


Merged CSV written to 'final_nodes_with_ids.csv'.


In [20]:
import pandas as pd
from datetime import datetime, timedelta
import time

# -----------------------
# 1. Compute current Eorzean time in 24-hour format
# -----------------------
local_epoch = int(time.time() * 1000)
epoch = local_epoch * 20.571428571428573
minutes = int((epoch / (1000 * 60)) % 60)
hours_24 = int((epoch / (1000 * 60 * 60)) % 24)

# Format as 24-hour time (e.g. "13:59")
et_time_str = f"{hours_24:02d}:{minutes:02d}"
print("Current Eorzean time (raw):", et_time_str)

# -----------------------
# 2. Round up to the next hour if minutes > 0 (using 24-hour format)
# -----------------------
current_time = datetime.strptime(et_time_str, "%H:%M")
if current_time.minute > 0:
    threshold_time = current_time.replace(minute=0, second=0, microsecond=0) + timedelta(hours=1)
else:
    threshold_time = current_time

print("Threshold Eorzean time (rounded):", threshold_time.strftime("%H:%M"))

# -----------------------
# 3. Load and process final_nodes_with_ids.csv
# -----------------------
df = pd.read_csv("final_nodes_with_ids.csv")

# Helper function to robustly parse the "Time" column.
def parse_time(time_str):
    time_str = time_str.strip()
    if "AM/PM" in time_str:
        # Remove the literal "AM/PM" substring.
        time_str = time_str.replace("AM/PM", "").strip()
        # Assume that the remaining time is in 24-hour format.
        return datetime.strptime(time_str, "%H:%M")
    elif "AM" in time_str or "PM" in time_str:
        # Parse using the 12-hour clock and then convert to 24-hour format.
        dt = datetime.strptime(time_str, "%I:%M %p")
        return datetime.strptime(dt.strftime("%H:%M"), "%H:%M")
    else:
        # Already in 24-hour format.
        return datetime.strptime(time_str, "%H:%M")

# Create a new column with parsed times.
df["Parsed Time"] = df["Time"].apply(parse_time)

# -----------------------
# 4. Adjust times for wrapping past midnight and filter for the next 24 hours
# -----------------------
def effective_time(row_time, threshold):
    # If the event time is earlier than the threshold, assume it's on the next day.
    return row_time + timedelta(days=1) if row_time < threshold else row_time

df["Effective Time"] = df["Parsed Time"].apply(lambda t: effective_time(t, threshold_time))

# Define the end of the 24-hour window.
end_time = threshold_time + timedelta(days=1)

# Filter for events whose effective time is in the next 24 hours.
df_filtered = df[(df["Effective Time"] >= threshold_time) & (df["Effective Time"] < end_time)]

# Sort by the effective time.
df_sorted = df_filtered.sort_values(by="Effective Time")

print("\nItems scheduled in the next 24 hours:")
print(df_sorted[["ID", "Time", "Item Name", "Location", "Coordinates"]])
df_sorted.to_csv("final_nodes_with_ids_sorted.csv", index=False)


Current Eorzean time (raw): 23:14
Threshold Eorzean time (rounded): 00:00

Items scheduled in the next 24 hours:
        ID        Time           Item Name                    Location  \
42    5350     1:00 AM     Silkworm Cocoon                 East Shroud   
105   9519     1:00 AM              Pumice           Eastern La Noscea   
104   5121     1:00 AM       Darksteel Ore  Coerthas Central Highlands   
17   32999  2:00 AM/PM  Rarefied Larch Sap                The Ruby Sea   
16   32997  2:00 AM/PM  Rarefied Larch Log                The Ruby Sea   
..     ...         ...                 ...                         ...   
120   5146     6:00 PM            Raw Ruby             Lower La Noscea   
121   5151     6:00 PM        Raw Sapphire             Lower La Noscea   
70    5546     9:00 PM       Trillium Bulb                 East Shroud   
69    6209     9:00 PM           Kidragora                 East Shroud   
122   5158     9:00 PM         Astral Rock  Coerthas Central Highlands   