In [1]:
import pandas as pd
import re
import duckdb
import itertools
import os

file_path = 'exported_text.txt'

# Function to parse a single hexagram
def parse_hexagram(hexagram):
    lines = hexagram.strip().split('\n')
    
    try:
        # Extract hexagram number and name
        number, name = lines[0].split('. ')
        number = int(number)

        # Extract top and bottom trigrams
        top_trigram = lines[1].split(', ')[-1]
        bottom_trigram = lines[2].split(', ')[-1]

        # Extract the judgment
        judgment_lines = []
        traits_index = None
        for i, line in enumerate(lines[3:], start=3):
            if line.startswith('Les traits'):
                traits_index = i
                break
            judgment_lines.append(line)
        judgment = ' '.join(judgment_lines).replace('\n', ' ')

        # Extract traits
        traits_lines = lines[traits_index+1:] if traits_index else []
        traits_text = '\n'.join(traits_lines)
        trait_pattern = re.compile(r'((Six|Neuf) [^\n]+\n(.*?)(?=\n(Six|Neuf) |$))', re.DOTALL)
        traits = trait_pattern.findall(traits_text)

        def clean_text(text):
            return text.replace('\n', ' ').strip()

        traits_cleaned = [clean_text(trait[0]) for trait in traits]

        # Create dictionary for parsed hexagram
        hexagram_dict = {
            'Number': number,
            'Name': name,
            'Top Trigram': top_trigram,
            'Bottom Trigram': bottom_trigram,
            'Judgment': judgment
        }
        for i in range(6):
            hexagram_dict[f'Trait {i+1}'] = traits_cleaned[i] if i < len(traits_cleaned) else ''
        
        return hexagram_dict
    except Exception as e:
        # Log the error and the problematic hexagram
        print(f"Error parsing hexagram: {lines[0]}")
        print(f"Error: {e}")
        return None

# Function to extract hexagrams from the full text
def extract_hexagrams(text):
    pattern = re.compile(r'(\d+\.\s[\s\S]+?)(?=\d+\.\s|$)')
    return pattern.findall(text)

# Read full text from file
with open(file_path, 'r', encoding='utf-8') as file:
    full_text = file.read()

# Extract hexagrams from the full text
hexagrams = extract_hexagrams(full_text)

# Parse all extracted hexagrams
parsed_hexagrams = [parse_hexagram(hexagram) for hexagram in hexagrams if parse_hexagram(hexagram)]

# Create DataFrame from parsed hexagrams
df = pd.DataFrame(parsed_hexagrams)

# Save DataFrame to CSV
csv_file = "tabular_data.csv"
df.to_csv(csv_file, index=False)

# Function to load data into DuckDB
def load_data_to_db(csv_file, db_file):
    df = pd.read_csv(csv_file)
    con = duckdb.connect(db_file)
    
    con.execute("""
        CREATE TABLE IF NOT EXISTS iching (
            Number INT,
            Name VARCHAR,
            Top_Trigram VARCHAR,
            Bottom_Trigram VARCHAR,
            Judgment TEXT,
            Trait_1 TEXT,
            Trait_2 TEXT,
            Trait_3 TEXT,
            Trait_4 TEXT,
            Trait_5 TEXT,
            Trait_6 TEXT
        )
    """)
    
    con.register('df', df)
    con.execute("INSERT INTO iching SELECT * FROM df")
    con.close()

# Database path
db_path = '/Users/Roger/Documents/code/iching/data/iching.db'

# Remove existing database file if exists
if os.path.exists(db_path):
    os.remove(db_path)

# Load data to DuckDB
load_data_to_db(csv_file, db_path)

# Create hexagram mapping table in DuckDB
con = duckdb.connect(db_path)
con.execute("""
CREATE TABLE IF NOT EXISTS hexagram_mapping (
    combination TEXT PRIMARY KEY,
    hexagram_id INTEGER
);
""")
con.close()

# Hexagram mapping details
trigram_mapping = {
    "111": "K'ien",
    "000": "K'ouen",
    "010": "Kan",
    "100": "Tchen",
    "001": "Touei",
    "101": "Li",
    "011": "Sun",
    "110": "Ken"
}

hexagram_table = [
    [1, 34, 5, 26, 11, 9, 14, 43],
    [25, 51, 3, 27, 24, 42, 21, 17],
    [6, 40, 29, 4, 7, 59, 64, 47],
    [33, 62, 39, 52, 15, 53, 56, 31],
    [12, 16, 8, 23, 2, 20, 35, 45],
    [44, 32, 48, 18, 46, 57, 50, 28],
    [13, 55, 63, 22, 36, 37, 30, 49],
    [10, 54, 60, 41, 19, 61, 38, 58]
]

# Function to convert lines to trigrams
def lines_to_trigrams(lines):
    return [''.join(['1' if line in (7, 9) else '0' for line in lines[i:i+3]]) for i in range(0, 6, 3)]

# Insert data into hexagram_mapping table
con = duckdb.connect(db_path)
combinations = list(itertools.product([6, 7, 8, 9], repeat=6))
mapping_data = []

for comb in combinations:
    trigrams = lines_to_trigrams(comb)
    top_trigram = trigram_mapping[trigrams[0]]
    bottom_trigram = trigram_mapping[trigrams[1]]

    top_index = list(trigram_mapping.values()).index(top_trigram)
    bottom_index = list(trigram_mapping.values()).index(bottom_trigram)

    hexagram_id = hexagram_table[top_index][bottom_index]
    combination_str = ''.join(map(str, comb))
    mapping_data.append((combination_str, hexagram_id))

con.executemany("INSERT INTO hexagram_mapping (combination, hexagram_id) VALUES (?, ?)", mapping_data)
con.close()


In [2]:
import pandas as pd
import re
import duckdb
import itertools
import os

In [6]:
db_path = '/Users/Roger/Documents/code/iching/data/iching.db'


con = duckdb.connect(database=db_path)

# Charger la table de mapping
mapping_table = con.execute("SELECT * FROM hexagram_mapping").fetchdf()

# Afficher la table
mapping_table  # Affiche les premières lignes pour vérifier

# # Afficher toute la table
# import ace_tools as tools; tools.display_dataframe_to_user(name="Hexagram Mapping Table", dataframe=mapping_table)


Unnamed: 0,combination,hexagram_id
0,666666,51
1,666667,24
2,666668,51
3,666669,24
4,666676,3
...,...,...
4091,999989,9
4092,999996,43
4093,999997,1
4094,999998,43


: 