In [135]:
import os
import sys
from IPython import get_ipython
from pathlib import Path
import json
import pandas as pd

In [136]:
def get_script_directory():
    """
    Returns the ACTUAL directory containing the notebook/script.
    Works in:
    - VS Code Jupyter notebooks
    - Regular Jupyter Notebook/Lab
    - Standalone Python scripts
    """
    # If running in Jupyter
    if 'ipykernel' in sys.modules:
        try:
            # 1. First try VS Code's special attribute
            shell = get_ipython()
            if hasattr(shell, '__vsc_ipynb_file__'):
                return str(Path(shell.__vsc_ipynb_file__).parent)
            
            # 2. Try Jupyter notebook path (modern Jupyter)
            from notebook.notebookapp import list_running_servers
            servers = list_running_servers()
            if servers:
                import requests
                from urllib.parse import urljoin
                kernel_id = Path(get_ipython().config['IPKernelApp']['connection_file']).stem.replace('kernel-', '')
                for server in servers:
                    sessions = requests.get(urljoin(server['url'], 'api/sessions'), params={'token': server.get('token', '')}).json()
                    for session in sessions:
                        if session['kernel']['id'] == kernel_id:
                            return str(Path(server['notebook_dir']) / Path(session['notebook']['path']).parent)
            
            # 3. Fallback to current working directory
            return str(Path.cwd())
        except:
            return str(Path.cwd())
    
    # If running as a Python script
    return str(Path(__file__).parent.resolve())

In [137]:
def load_json(json_path):
    with open(json_path, 'r') as file:
        json_contents = json.load(file)
    return json_contents

In [138]:
def construct_url_dict(gid_dict, url_template):
    url_dict = {}
    for sheet, gid in gid_dict.items():
        full_url = url_template.replace("edit?gid=gid_value#gid=gid_value", f"export?format=csv&gid={gid}")
        url_dict[sheet] = full_url
    return url_dict

In [139]:
def construct_master_url_dict(sheets_dict):
    master_url_dict={}
    for spreadsheet, spreadsheet_dict in sheets_dict.items():
        spreadsheet_gid_dict=spreadsheet_dict['sheets']
        spreadsheet_url_template=spreadsheet_dict['link_template']
        spreadsheet_url_dict=construct_url_dict(spreadsheet_gid_dict, spreadsheet_url_template)
        master_url_dict[spreadsheet]=spreadsheet_url_dict
    return master_url_dict

In [140]:
def row_first_value(row):
    first_value = str(row.iloc[0]).strip() if pd.notna(row.iloc[0]) else None
    return first_value

def first_column_indecies(df):
    indecies_list = df.iloc[1:, 0]
    return indecies_list

In [None]:
# def initial_content_dict_from_url(url):    
#     df=pd.read_csv(url)
#     for col in df.select_dtypes(include=['float64']):
#         if (df[col].dropna().apply(float.is_integer).all()):
#             df[col] = df[col].astype('Int64')
#     initial_content_dict = {
#         row.iloc[0]: {
#             df.columns[i+1]: val 
#             for i, val in enumerate(row.iloc[1:])  # Skip first column
#             if pd.notna(val)
#         }
#         for _, row in df.iterrows()
#         if pd.notna(row.iloc[0])  # First column as key
#     }
#     return initial_content_dict

In [241]:
def initial_content_dict_from_url(url):
    df = pd.read_csv(url)
    
    # Convert float columns to Int64 where appropriate
    for col in df.select_dtypes(include=['float64']):
        if (df[col].dropna().apply(float.is_integer).all()):
            df[col] = df[col].astype('Int64')
    
    content_dict = {}
    
    for _, row in df.iterrows():
        if pd.notna(row.iloc[0]):  # First column as key
            index_value = row.iloc[0]
            
            if index_value not in content_dict:
                content_dict[index_value] = {}
                column_counts = {}  # Track columns per index
            else:
                column_counts = {k.split('_')[0]: v for k, v in content_dict[index_value].items()}
            
            # Process each column
            for i, val in enumerate(row.iloc[1:]):
                if pd.notna(val):
                    original_col = df.columns[i+1]
                    base_col = original_col.split('.')[0]  # Remove pandas suffixes
                    
                    # Count how many times we've seen this column for this index
                    count = sum(1 for k in content_dict[index_value] 
                              if k.startswith(base_col + '_') or k == base_col)
                    
                    if count > 0:
                        col_name = f"{base_col}_{count+1}"
                    else:
                        col_name = base_col
                    
                    content_dict[index_value][col_name] = val
    
    return content_dict

In [242]:
def check_first_row_and_column_duplicates(df, detailed=False):
    """
    Enhanced duplicate checker for first column values and all headers.
    
    Args:
        df (pd.DataFrame): Input DataFrame
        detailed (bool): If True, returns duplicate counts
        
    Returns:
        dict: {
            'column_duplicates': bool/Series,  # First column values
            'header_duplicates': bool/Series,  # All column headers
            'exact_header_duplicates': list   # List of duplicate header names
        }
    """
    results = {
        'column_duplicates': False,
        'header_duplicates': False,
        'exact_header_duplicates': []
    }
    
    # Check first column (values)
    first_col = df.iloc[:, 0]
    col_duplicates = first_col[first_col.duplicated(keep=False)]
    if detailed:
        results['column_duplicates'] = col_duplicates.value_counts().sort_values(ascending=False)
    else:
        results['column_duplicates'] = not col_duplicates.empty
    
    # Enhanced header check
    header_counts = pd.Series(df.columns).value_counts()
    dup_headers = header_counts[header_counts > 1]
    
    if not dup_headers.empty:
        results['header_duplicates'] = True
        results['exact_header_duplicates'] = dup_headers.index.tolist()
        if detailed:
            results['header_duplicates'] = dup_headers.sort_values(ascending=False)
    
    return results

In [243]:
# def main():
script_path=get_script_directory()
config_path=script_path + '\\config.json'
sheets_path=script_path + '\\sheets.json'

config_dict=load_json(config_path)
sheets_dict=load_json(sheets_path)

master_url_dict=construct_master_url_dict(sheets_dict)

game_content_dict = {}
for game in config_dict['games']:
    game_content_dict[game] = {}
    for sheet, url in master_url_dict[game].items():
        game_content_dict[game][sheet] = initial_content_dict_from_url(url)

In [244]:
game_content_dict['Book of Hours']['Crafting Recipe']['Anbary & Lapidary']

{'Aspect': 'Sky',
 'Prentice': 'Awakened Feather',
 'Scholar Ingredient': 'Liquid',
 'Scholar': 'Xanthotic Essence',
 'Keeper Ingredient': 'Ichor Vitreous',
 'Keeper': 'Ashartine',
 'Aspect_2': 'Forge',
 'Prentice_2': 'Bitterblack Salts',
 'Scholar Ingredient_2': 'Liquid',
 'Scholar_2': 'Iotic Essence',
 'Keeper Ingredient_2': 'Iotic Essence',
 'Keeper_2': 'Orpiment Exutant',
 'Keeper Ingredient_3': 'Xanthotic Essence',
 'Keeper_3': 'Iotic Essence',
 'Encaustum Terminale': 'Encaustum Terminale',
 'Aspect_3': 'Lantern',
 'Keeper Ingredient_4': 'Xanthotic Essence',
 'Keeper_4': 'Uzult',
 'Encaustum Terminale_2': 'Encaustum Terminale'}

In [245]:
game_content_dict['Book of Hours']['Crafting Recipe']

{'Coil & Chasm': {'Aspect': 'Scale',
  'Prentice': "Bisclavret's Knot",
  'Scholar Ingredient': 'Memory',
  'Scholar': 'Old Moment',
  'Keeper Ingredient': 'Old Moment',
  'Keeper': 'Earthquake Name',
  'Aspect_2': 'Nectar',
  'Prentice_2': "Thirza's Cordials",
  'Scholar Ingredient_2': 'Wood',
  'Scholar_2': 'Pyrus Auricalcinus',
  'Keeper Ingredient_2': 'Pyrus Auricalcinus',
  'Keeper_2': 'Sacrament Malachite'},
 'Inks of Revelation': {'Aspect': 'Lantern',
  'Prentice': 'Refulgin',
  'Scholar Ingredient': 'Glass',
  'Scholar': 'Ichor Vitreous',
  'Keeper Ingredient': 'Xanthotic Essence',
  'Keeper': 'Uzult',
  'Encaustum Terminale': 'Encaustum Terminale',
  'Aspect_2': 'Moth',
  'Prentice_2': 'Moth-Gold',
  'Scholar Ingredient_2': 'Liquid',
  'Scholar_2': 'Asimel',
  'Aspect_3': 'Nectar',
  'Prentice_3': 'Yewgall Ink',
  'Aspect_4': 'Sky',
  'Prentice_4': 'Stargall Ink',
  'Aspect_5': 'Rose',
  'Keeper Ingredient_2': 'Ichor Auroral',
  'Keeper_2': 'Porphyrine',
  'Encaustum Terminale

In [246]:
import requests
import pandas as pd

def check_index_duplicates(df, detailed=False):
    """
    Check only for duplicates in the first column (indices)
    
    Args:
        df: pandas DataFrame
        detailed: If True, returns value counts
        
    Returns:
        Series if detailed=True, else bool
    """
    first_col = df.iloc[:, 0]
    duplicates = first_col[first_col.duplicated(keep=False)]
    
    if detailed:
        return duplicates.value_counts().sort_values(ascending=False)
    return not duplicates.empty

# Main processing loop
for game in config_dict['games']:
    for sheet, url in master_url_dict[game].items():
        try:
            # Load the DataFrame directly (no need for header check)
            df = pd.read_csv(url)
            
            # Check only for index duplicates
            duplicates = check_index_duplicates(df, detailed=True)
            
            # Print results if duplicates found
            if not duplicates.empty:
                print(f'\n[INDICES] Duplicates found in {game} - {sheet}:')
                print(duplicates.to_string())
                
                # Optional: Show the headers for reference
                print("\nCurrent headers:", df.columns.tolist())
                
        except Exception as e:
            print(f"\nError processing {game} - {sheet}: {str(e)}")


[INDICES] Duplicates found in Book of Hours - Crafting Recipe:
Skill for Recipe
Inks of Revelation                 5
Inks of Containment                5
Inks of Power                      5
Furs & Feathers                    3
Drums & Dances                     3
Tridesma Hiera                     3
Transformations & Liberations      3
Glassblowing & Vesselcrafting      3
Glaziery & Lightsmithing           3
Purifications & Exaltations        3
Ouranoscory                        3
Weaving & Knotworking              3
Auroral Contemplations             3
Anbary & Lapidary                  3
Coil & Chasm                       2
Putrefactions & Calcinations       2
Insects & Nectars                  2
Herbs & Infusions                  2
Path & Pilgrim                     2
Pearl & Tide                       2
Sand Stories                       2
Disciplines of the Scar            2
Hills & Hollow                     2
Surgeries & Exsanguinations        2
Preliminal Meter               

In [247]:
df = pd.read_csv(master_url_dict['Book of Hours']['Crafting Recipe'])
print("Actual headers:", df.columns.tolist())

Actual headers: ['Skill for Recipe', 'Aspect', 'Prentice', 'Scholar Ingredient', 'Scholar', 'Keeper Ingredient', 'Keeper', 'Keeper Ingredient.1', 'Keeper.1', 'Encaustum Terminale']


In [248]:
game_content_dict
    # print(game)
    # print(master_url_dict[game])

{'Cultist Simulator': {'Expeditions': {'Cater & Hero Limited': {'Secret Histories Level': 2,
    'Aspects (tags)': 'In the Capital: A secret place in the teeming capital city where I live.',
    'Card Text': "It's been abandoned since the explosion, then both the owners died. The machines stand silent; the building is a collapsing shell. Mr Cater was known to display peculiar trinkents in his office. Perhaps some yet remain.",
    'Beginning text': 'The remaining factory walls could topple at any moment. Blackened timbers creak in pain. We must equip ourselves properly and move with dreadful care.',
    'End text': "Cater's Office: Two floorboards lift away to reveal a hollow beneath Cater's desk! The rats have made free with the biscuits he stores there against the peckishness which haunts the enterpreneur, but they could not penetrate this iron lock-box. A moment with a crowbar will open it.",
    'Obstacle 1': 'Treacherous Ground (Forge)',
    'Loot 1': 'Bronze Spintria',
    'Loot 

In [249]:
boh_memory_url=master_url_dict['Book of Hours']['Memories']
initial_content_dict_from_url(boh_memory_url)

{'A Stolen Secret': {'Knock': 1, 'Moon': 2},
 'Ascendant Harmony': {'Lantern': 2,
  'Sky': 4,
  'Evolution': 'Birdsong',
  'Special': 'Sound',
  'Persistent': 'Y',
  'Crafted': 'Y'},
 'Beguiling Melody': {'Grail': 2, 'Sky': 2, 'Special': 'Sound'},
 'Bittersweet Certainty': {'Lantern': 1, 'Winter': 2, 'Crafted': 'Y'},
 'Cheerful Ditty': {'Heart': 2, 'Sky': 1, 'Special': 'Sound', 'Crafted': 'Y'},
 'Confounding Parable': {'Moon': 2, 'Rose': 2, 'Sky': 2, 'Crafted': 'Y'},
 'Contradiction': {'Edge': 2, 'Moon': 1},
 'Curious Hunch': {'Heart': 3,
  'Knock': 4,
  'Lantern': 3,
  'Moth': 3,
  'Evolution': 'Nyctodromy',
  'Special': 'Omen',
  'Persistent': 'Y',
  'Crafted': 'Y'},
 'Didumos': {'Knock': 3,
  'Lantern': 3,
  'Sky': 6,
  'Winter': 3,
  'Evolution': 'Horomachistry/Hushery',
  'Persistent': 'Y',
  'Crafted': 'Y'},
 'Earth-Sign': {'Nectar': 2,
  'Scale': '2',
  'Evolution': 'Skolekosophy',
  'Special': 'Omen',
  'Persistent': 'Y'},
 'Earthquake Name': {'Forge': 3,
  'Scale': '6',
  'Evo

In [250]:
# # Create output directory relative to script location
# script_dir = Path(get_script_directory())
# output_dir = script_dir.parent / "Obsidian/markdown_files"
# output_dir.mkdir(exist_ok=True)

# def row_value_pairs(row):
#     """Convert a pandas row to Obsidian-friendly markdown format"""
#     content = []
    
#     # Get the first column's value (regardless of other columns)
#     first_col_name = row.index[0]  # Name of the first column
    
#     # Add other columns as key-value pairs (skip the first column)
#     # content.append(f"- **Type**: {type}")
#     for col, val in row.items():
#         if col == first_col_name:  # Skip the first column (already used as heading)
#             content.append(f"- **Type**: {col}")
#         if pd.notna(val):
#             clean_val = str(val).strip().replace('\r\n', '\n').replace('\n', '<br>')
#             content.append(f"- **{col}**: {clean_val}")
    
#     return "\n".join(content)

# def md_files_from_df(df):
#     # Write each row to a markdown file
#     for index, row in df.iterrows():
#         # Create safe filename (remove special chars)
#         safe_filename = row_first_value(row).replace(':', ' -') + '.md'
#         filepath = output_dir / safe_filename
#         # print(f'filepath: {filepath}')
        
#         try:
#             with open(filepath, 'w', encoding='utf-8') as f:
#                 f.write(row_value_pairs(row))
#             print(f"✓ Created: {filepath.relative_to(output_dir)}")
#         except Exception as e:
#             print(f"✗ Error writing {filepath.name}: {str(e)}")