## Creating Labelled Combinaiton Table

In [31]:
import pandas as pd
import json

In [37]:
nameless_rules = pd.read_csv('data\\nonlabelled_combinations.csv', header=None)
print(nameless_rules)

with open('data\\elements_ordered.json', 'r') as file:
        ordered_elements = json.load(file)

ordered_elements = [s.replace('.jpg', '') for s in ordered_elements]
print('Ordered List of element names: {}\n#of Elements: {}'.format(ordered_elements, len(ordered_elements)))


      0   1    2    3
0     3   0    0   12
1     2   0    0   14
2     1   0    0   15
3    12  15    0   16
4    16   0    0   17
..   ..  ..  ...  ...
173  32  35  226  316
174  32  35  227  317
175  32  35  228  318
176  32  35  229  319
177  32  35  230  320

[178 rows x 4 columns]
Ordered List of element names: ['Big_Tree', 'Tree', 'Stone', 'Red_Berry', 'Blue_Berry', 'Antler', 'Small_Stone', 'Stone_Tool1', 'Bough', 'Stick', 'Bark', 'Fiber', 'Twine', 'Axe', 'Log', 'Log_W_Bark', 'Half_Log', 'Quarter_Log', 'Antler_Refined', 'Stone_Tool2', 'Stone_Tool3', 'Stone_Tool4', 'Stone_Tool5', 'Big_Bough', 'Big_Bough_Refined', 'Big_Axe', 'Small_Stick', 'Brush', 'Container', 'Red_Paint', 'Blue_Paint', 'Big_Log', 'Big_Log_W_Bark', 'S_Log_Rd', 'S_Log_Sq', 'S_Log_Tr', 'R_S_Log_Rd', 'R_S_Log_Sq', 'R_S_Log_Tr', 'B_S_Log_Rd', 'B_S_Log_Sq', 'B_S_Log_Tr', 'S_Log_Rd_R', 'S_Log_Sq_R', 'S_Log_Tr_R', 'S_Log_Rd_B', 'S_Log_Sq_B', 'S_Log_Tr_B', 'R_S_Log_Rd_R', 'R_S_Log_Sq_R', 'R_S_Log_Tr_R', 'B_S_Log_Rd_R', '

In [38]:
mapping = {i+1: s for i, s in enumerate(ordered_elements)}      

# Apply the mapping to the DataFrame
nameless_rules_replaced = nameless_rules.applymap(lambda x: mapping.get(x, x))      # Does not change all the element names.
nameless_rules_replaced.to_csv('data\\labelled_combinations.csv', index=False)

## Element Image Label Translations

In [40]:
pip install Pillow imagehash

Note: you may need to restart the kernel to use updated packages.


In [47]:
import os
import shutil
from PIL import Image
import imagehash

def get_image_hash(image_path):
    try:
        return imagehash.average_hash(Image.open(image_path))
    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
        return None

def append_image_names(source_folder, target_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    source_images = [f for f in os.listdir(source_folder) if f.lower().endswith('.jpg')]
    target_images = [f for f in os.listdir(target_folder) if f.lower().endswith('.jpg')]

    print(f"Source images: {len(source_images)}")
    print(f"Target images: {len(target_images)}")

    source_hashes = {get_image_hash(os.path.join(source_folder, img)): img for img in source_images if get_image_hash(os.path.join(source_folder, img)) is not None}
    target_hashes = {get_image_hash(os.path.join(target_folder, img)): img for img in target_images if get_image_hash(os.path.join(target_folder, img)) is not None}

    print(f"Processed source images: {len(source_hashes)}")
    print(f"Processed target images: {len(target_hashes)}")

    matched_count = 0
    for hash_value, target_name in target_hashes.items():
        if hash_value in source_hashes:
            source_name = source_hashes[hash_value]
            new_name = f"{os.path.splitext(target_name)[0]}_{os.path.splitext(source_name)[0]}.jpg"
            old_path = os.path.join(target_folder, target_name)
            new_path = os.path.join(output_folder, new_name)
            shutil.copy2(old_path, new_path)
            matched_count += 1
            print(f"Copied and renamed: {target_name} -> {new_name}")
        else:
            print(f"No match found for target image: {target_name}")

    print(f"\nTotal matches: {matched_count}")
    print(f"Unmatched source images: {len(source_images) - matched_count}")
    print(f"Unmatched target images: {len(target_images) - matched_count}")

# Usage
source_folder = 'data\\Image'
target_folder = 'data\\images-semantic'
output_folder = 'data\\append_semantics_images'
append_image_names(source_folder, target_folder, output_folder)

Source images: 149
Target images: 192
Processed source images: 94
Processed target images: 90
Copied and renamed: 1.jpg -> 1_Big_Tree.jpg
Copied and renamed: 20.jpg -> 20_S_Log_Tr.jpg
Copied and renamed: 115.jpg -> 115_S_Log_Tr_R.jpg
Copied and renamed: 139.jpg -> 139_R_S_Log_Tr_R.jpg
Copied and renamed: 11.jpg -> 11_Small_Stone.jpg
Copied and renamed: 12.jpg -> 12_Stone_Tool1.jpg
Copied and renamed: 13.jpg -> 13_Bough.jpg
Copied and renamed: 14.jpg -> 14_Stick.jpg
Copied and renamed: 15.jpg -> 15_Bark.jpg
Copied and renamed: 16.jpg -> 16_Fiber.jpg
Copied and renamed: 17.jpg -> 17_Twine.jpg
Copied and renamed: 18.jpg -> 18_Axe.jpg
Copied and renamed: 2.jpg -> 2_Tree.jpg
Copied and renamed: 201.jpg -> 201_BG_Log_Top_Rd.jpg
Copied and renamed: 202.jpg -> 202_BG_Log_Top_Sq.jpg
Copied and renamed: 203.jpg -> 203_BG_Log_Top_Tr.jpg
Copied and renamed: 205.jpg -> 205_BG_Log_Top_Sq_Bt_Rd.jpg
Copied and renamed: 206.jpg -> 206_BG_Log_Top_Tr_Bt_Rd.jpg
Copied and renamed: 207.jpg -> 207_BG_Log_To

# Same as above, different method.

In [7]:
import os
import csv
from PIL import Image
import imagehash
from tqdm import tqdm

def create_image_mapping(semantic_folder, string_folder, output_csv):
    # Dictionary to store image hashes and their corresponding filenames
    semantic_images = {}
    string_images = {}

    # Calculate hashes for images in the semantic folder
    print("Processing images-semantic folder...")
    for filename in tqdm(os.listdir(semantic_folder)):
        if filename.endswith('.jpg'):
            img_path = os.path.join(semantic_folder, filename)
            img_hash = imagehash.average_hash(Image.open(img_path))
            semantic_images[str(img_hash)] = filename[:-4]  # Remove .jpg

    # Calculate hashes for images in the string folder
    print("Processing Image folder...")
    for filename in tqdm(os.listdir(string_folder)):
        if filename.endswith('.jpg'):
            img_path = os.path.join(string_folder, filename)
            img_hash = imagehash.average_hash(Image.open(img_path))
            string_images[str(img_hash)] = filename[:-4]  # Remove .jpg

    # Create a list of mappings
    mappings = []
    for img_hash, semantic_name in semantic_images.items():
        if img_hash in string_images:
            mappings.append((int(semantic_name), string_images[img_hash]))

    # Sort mappings based on the numeric name
    mappings.sort(key=lambda x: x[0])

    # Create the CSV file
    print("Creating CSV file...")
    with open(output_csv, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Numeric Name', 'String Name'])
        for numeric_name, string_name in mappings:
            writer.writerow([numeric_name, string_name])

    print(f"CSV file '{output_csv}' created successfully.")

def test_accuracy(semantic_folder, string_folder, csv_file):
    print("Testing accuracy...")
    with open(csv_file, 'r') as csvfile:
        reader = csv.reader(csvfile)
        next(reader)  # Skip header row
        
        total = 0
        correct = 0
        for row in tqdm(reader):
            numeric_name, string_name = row
            numeric_path = os.path.join(semantic_folder, f"{numeric_name}.jpg")
            string_path = os.path.join(string_folder, f"{string_name}.jpg")
            
            if os.path.exists(numeric_path) and os.path.exists(string_path):
                numeric_hash = imagehash.average_hash(Image.open(numeric_path))
                string_hash = imagehash.average_hash(Image.open(string_path))
                
                if numeric_hash == string_hash:
                    correct += 1
                total += 1

    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy: {accuracy:.2f}%")
    return accuracy == 100.0

# Usage
semantic_folder = r"C:\Users\chand\Downloads\emp_totem_translated\totem_empowerment\data\images-semantic - Copy"
string_folder = r"C:\Users\chand\Downloads\emp_totem_translated\totem_empowerment\data\Image - Copy"
output_csv = "image_mapping.csv"

create_image_mapping(semantic_folder, string_folder, output_csv)
is_accurate = test_accuracy(semantic_folder, string_folder, output_csv)

if is_accurate:
    print("The correspondence is 100% accurate.")
else:
    print("The correspondence is not 100% accurate.")

Processing images-semantic folder...


100%|██████████| 144/144 [00:00<00:00, 1524.94it/s]


Processing Image folder...


100%|██████████| 144/144 [00:00<00:00, 1390.98it/s]


Creating CSV file...
CSV file 'image_mapping.csv' created successfully.
Testing accuracy...


90it [00:00, 775.11it/s]

Accuracy: 100.00%
The correspondence is 100% accurate.





# Rules to alchemy format

In [20]:
import csv
import json

def csv_to_json(input_csv, output_json):
    # Dictionary to store the combinations and results
    combinations = {}

    # Read the CSV file
    with open(input_csv, 'r') as csvfile:
        reader = csv.reader(csvfile)
        next(reader, None)  # Skip the header row if it exists

        for row in reader:
            # Filter out zeros and empty strings
            elements = [elem.strip() for elem in row[:3] if elem.strip() and elem.strip() != '0']
            result = row[3].strip()

            # Skip if result is zero or empty
            if not result or result == '0':
                continue

            # Create the combination key
            key = ', '.join(elements)

            # Add the combination to the dictionary
            if key in combinations:
                if result not in combinations[key]:
                    combinations[key].append(result)
            else:
                combinations[key] = [result]

    # Write the dictionary to a JSON file
    with open(output_json, 'w') as jsonfile:
        json.dump(combinations, jsonfile, indent=2)

    print(f"Conversion complete. JSON file saved as {output_json}")

# Usage
input_csv = 'data\\nonlabelled_combinations - Copy.csv'  # Replace with your input CSV file name
output_json = 'data\\elements.json'  # Replace with your desired output JSON file name

csv_to_json(input_csv, output_json)

Conversion complete. JSON file saved as data\elements.json


In [21]:
import json
import pandas as pd

def read_json_file(file_path):
    """Read a JSON file and return its contents."""
    with open(file_path, 'r') as file:
        return json.load(file)

def display_json_structure(data, indent=0):
    """Recursively display the structure of a JSON object."""
    if isinstance(data, dict):
        for key, value in data.items():
            print('  ' * indent + str(key))
            display_json_structure(value, indent + 1)
    elif isinstance(data, list):
        print('  ' * indent + f'List with {len(data)} items')
        if data:
            display_json_structure(data[0], indent + 1)
    else:
        print('  ' * indent + f'Value: {type(data).__name__}')

def json_to_dataframe(data):
    """Convert JSON data to a pandas DataFrame if possible."""
    return pd.json_normalize(data)

def analyze_json(file_path):
    """Read and analyze a JSON file."""
    # Read the JSON file
    data = read_json_file(file_path)
    
    print("1. JSON Structure:")
    display_json_structure(data)
    
    print("\n2. JSON Content (first 5 items if list):")
    if isinstance(data, list):
        print(json.dumps(data[:5], indent=2))
    else:
        print(json.dumps(data, indent=2))
    
    print("\n3. Data Types:")
    if isinstance(data, dict):
        for key, value in data.items():
            print(f"{key}: {type(value)}")
    elif isinstance(data, list) and data:
        for key, value in data[0].items():
            print(f"{key}: {type(value)}")
    
    print("\n4. Converting to DataFrame:")
    try:
        df = json_to_dataframe(data)
        print(df.head())
        print("\nDataFrame Info:")
        print(df.info())
    except Exception as e:
        print(f"Could not convert to DataFrame: {e}")

# Usage
file_path = 'data\\elements.json'
analyze_json(file_path)

1. JSON Structure:
Stone
  List with 1 items
    Value: str
Tree
  List with 1 items
    Value: str
Big_Tree
  List with 1 items
    Value: str
Stone_Tool1, Bark
  List with 1 items
    Value: str
Fiber
  List with 1 items
    Value: str
Stone_Tool1, Stick, Twine
  List with 1 items
    Value: str
Tree, Axe
  List with 1 items
    Value: str
Stone_Tool1, Log
  List with 1 items
    Value: str
Axe, Log
  List with 1 items
    Value: str
Stone, Antler
  List with 1 items
    Value: str
Stone_Tool1, Antler_Refined
  List with 1 items
    Value: str
Antler_Refined, Stone_Tool2
  List with 1 items
    Value: str
Stick, Stone_Tool4
  List with 1 items
    Value: str
Fiber, Twine, Small_Stick
  List with 1 items
    Value: str
Quarter_Log, Stone_Tool2
  List with 1 items
    Value: str
Red_Berry, Antler_Refined, Container
  List with 1 items
    Value: str
Blue_Berry, Antler_Refined, Container
  List with 1 items
    Value: str
Big_Tree, Axe
  List with 1 items
    Value: str
Stone_Tool1, Big

In [22]:
with open(file_path, 'r') as file:
    elements = json.load(file)

elements

{'Stone': ['Stone_Tool1'],
 'Tree': ['Stick'],
 'Big_Tree': ['Bark'],
 'Stone_Tool1, Bark': ['Fiber'],
 'Fiber': ['Twine'],
 'Stone_Tool1, Stick, Twine': ['Axe'],
 'Tree, Axe': ['Log'],
 'Stone_Tool1, Log': ['Log_W_Bark'],
 'Axe, Log': ['Quarter_Log'],
 'Stone, Antler': ['Antler_Refined'],
 'Stone_Tool1, Antler_Refined': ['Stone_Tool2'],
 'Antler_Refined, Stone_Tool2': ['Stone_Tool4'],
 'Stick, Stone_Tool4': ['Small_Stick'],
 'Fiber, Twine, Small_Stick': ['Brush'],
 'Quarter_Log, Stone_Tool2': ['Container'],
 'Red_Berry, Antler_Refined, Container': ['Red_Paint'],
 'Blue_Berry, Antler_Refined, Container': ['Blue_Paint'],
 'Big_Tree, Axe': ['Big_Log'],
 'Stone_Tool1, Big_Log': ['Big_Log_W_Bark'],
 'Log_W_Bark, Stone_Tool2': ['S_Log_Rd'],
 'Stone_Tool1, Log_W_Bark': ['S_Log_Sq'],
 'Log_W_Bark, Stone_Tool4': ['S_Log_Tr'],
 'Red_Paint, S_Log_Rd': ['R_S_Log_Rd'],
 'Red_Paint, S_Log_Sq': ['R_S_Log_Sq'],
 'Red_Paint, S_Log_Tr': ['R_S_Log_Tr'],
 'Blue_Paint, S_Log_Rd': ['B_S_Log_Rd'],
 'Blue_Pa