In [None]:
!pip install spacy
!spacy download en_core_web_sm

In [2]:
import pandas as pd
import spacy
import re
from typing import Dict, List, Tuple
import numpy as np

try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    print("Please install the spaCy English model first:")
    print("python -m spacy download en_core_web_sm")
    nlp = None

# Load the character properties CSV
def load_properties(file_path):
    """Load the character properties from CSV file"""
    try:
        df = pd.read_csv(file_path)
        print(f"Loaded {len(df)} properties from CSV")
        return df
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        return None

# Load the properties
properties_df = load_properties("data/New-Text-Document.csv")

if properties_df is not None:
    print("\nFirst 10 properties:")
    print(properties_df.head(10))
    
    print(f"\nTotal properties: {len(properties_df)}")
    print(f"Column name: {properties_df.columns[0]}")

Loaded 2020 properties from CSV

First 10 properties:
                 male_properties
0                          Basis
1                     L1_African
2                       L1_Anime
3                       L1_Asian
4                   L1_Caucasian
5                       L1_Dwarf
6                         L1_Elf
7                       L1_Latin
8  L2__Abdomen_Mass-Tone_max-max
9  L2__Abdomen_Mass-Tone_max-min

Total properties: 2020
Column name: male_properties


In [3]:
# Property Analysis and Categorization

def categorize_properties(properties_list):
    """Categorize properties into different body parts and features"""
    categories = {
        'ethnicity': [],
        'body': [],
        'face': [],
        'eyes': [],
        'nose': [],
        'mouth': [],
        'ears': [],
        'cheeks': [],
        'chin': [],
        'jaw': [],
        'forehead': [],
        'eyebrows': [],
        'fantasy': [],
        'arms': [],
        'legs': [],
        'hands': [],
        'feet': [],
        'neck': [],
        'shoulders': [],
        'pelvis': [],
        'torso': [],
        'stomach': [],
        'waist': [],
        'expression': [],
        'other': []
    }
    
    for prop in properties_list:
        prop_lower = prop.lower()
        
        # Ethnicity detection
        if any(eth in prop_lower for eth in ['african', 'asian', 'caucasian', 'elf', 'dwarf', 'latin']):
            categories['ethnicity'].append(prop)
        # Body parts
        elif 'body' in prop_lower:
            categories['body'].append(prop)
        elif 'eye' in prop_lower:
            categories['eyes'].append(prop)
        elif 'nose' in prop_lower:
            categories['nose'].append(prop)
        elif 'mouth' in prop_lower:
            categories['mouth'].append(prop)
        elif 'ear' in prop_lower:
            categories['ears'].append(prop)
        elif 'cheek' in prop_lower:
            categories['cheeks'].append(prop)
        elif 'chin' in prop_lower:
            categories['chin'].append(prop)
        elif 'jaw' in prop_lower:
            categories['jaw'].append(prop)
        elif 'forehead' in prop_lower:
            categories['forehead'].append(prop)
        elif 'eyebrow' in prop_lower:
            categories['eyebrows'].append(prop)
        elif 'fantasy' in prop_lower:
            categories['fantasy'].append(prop)
        elif 'arm' in prop_lower:
            categories['arms'].append(prop)
        elif 'leg' in prop_lower:
            categories['legs'].append(prop)
        elif 'hand' in prop_lower:
            categories['hands'].append(prop)
        elif 'foot' in prop_lower or 'feet' in prop_lower:
            categories['feet'].append(prop)
        elif 'neck' in prop_lower:
            categories['neck'].append(prop)
        elif 'shoulder' in prop_lower:
            categories['shoulders'].append(prop)
        elif 'pelvis' in prop_lower:
            categories['pelvis'].append(prop)
        elif 'torso' in prop_lower:
            categories['torso'].append(prop)
        elif 'stomach' in prop_lower:
            categories['stomach'].append(prop)
        elif 'waist' in prop_lower:
            categories['waist'].append(prop)
        elif any(expr in prop_lower for expr in ['smile', 'brow', 'pupil', 'tongue', 'nostril']):
            categories['expression'].append(prop)
        else:
            categories['other'].append(prop)
    
    return categories

# Categorize our properties
if properties_df is not None:
    property_list = properties_df[properties_df.columns[0]].tolist()
    categories = categorize_properties(property_list)
    
    print("Property Categories Summary:")
    for category, props in categories.items():
        print(f"{category}: {len(props)} properties")

Property Categories Summary:
ethnicity: 1500 properties
body: 3 properties
face: 0 properties
eyes: 72 properties
nose: 53 properties
mouth: 61 properties
ears: 32 properties
cheeks: 16 properties
chin: 10 properties
jaw: 14 properties
forehead: 10 properties
eyebrows: 0 properties
fantasy: 5 properties
arms: 14 properties
legs: 38 properties
hands: 18 properties
feet: 12 properties
neck: 25 properties
shoulders: 16 properties
pelvis: 20 properties
torso: 26 properties
stomach: 4 properties
waist: 2 properties
expression: 22 properties
other: 47 properties


In [4]:
# Enhanced NLP Feature Mapping

class CharacterPropertyMapper:
    def __init__(self, properties_df):
        self.properties = properties_df[properties_df.columns[0]].tolist()
        self.categories = categorize_properties(self.properties)
        self.nlp = nlp
        
        # Enhanced feature mapping dictionary
        self.feature_keywords = {
            'big_eyes': ['big eyes', 'large eyes', 'wide eyes', 'expressive eyes'],
            'small_eyes': ['small eyes', 'narrow eyes', 'squinty eyes'],
            'sharp_nose': ['sharp nose', 'pointed nose', 'angular nose'],
            'wide_nose': ['wide nose', 'broad nose', 'flat nose'],
            'long_nose': ['long nose', 'prominent nose'],
            'short_nose': ['short nose', 'small nose'],
            'full_lips': ['full lips', 'plump lips', 'big lips', 'luscious lips'],
            'thin_lips': ['thin lips', 'small lips', 'narrow lips'],
            'strong_jaw': ['strong jaw', 'defined jaw', 'angular jaw', 'square jaw'],
            'soft_jaw': ['soft jaw', 'round jaw', 'gentle jaw'],
            'wide_jaw': ['wide jaw', 'broad jaw'],
            'narrow_jaw': ['narrow jaw', 'thin jaw'],
            'prominent_chin': ['strong chin', 'prominent chin', 'defined chin'],
            'weak_chin': ['weak chin', 'receding chin', 'small chin'],
            'high_cheekbones': ['high cheekbones', 'prominent cheeks', 'defined cheeks'],
            'round_face': ['round face', 'full face', 'chubby face'],
            'angular_face': ['angular face', 'sharp face', 'defined face'],
            'muscular_body': ['muscular', 'athletic', 'toned', 'fit'],
            'slim_body': ['slim', 'thin', 'lean', 'slender'],
            'large_body': ['large', 'big', 'heavy', 'stocky'],
            'tall': ['tall', 'height'],
            'short': ['short', 'small stature'],
            'young': ['young', 'youthful', 'boyish', 'girlish'],
            'old': ['old', 'aged', 'elderly', 'wrinkled'],
            'intelligent': ['intelligent', 'smart', 'wise', 'clever'],
            'strong': ['strong', 'powerful', 'robust', 'sturdy'],
            'gentle': ['gentle', 'kind', 'soft', 'friendly']
        }
        
        # Property mapping with intensity
        self.property_mapping = {
            'big_eyes': ['L2__Eyes_Size_max', 'L2__Eyes_IrisSize_max'],
            'small_eyes': ['L2__Eyes_Size_min', 'L2__Eyes_IrisSize_min'],
            'sharp_nose': ['L2_Caucasian_Nose_TipSize_min', 'L2_Caucasian_Nose_BridgeSizeX_min'],
            'wide_nose': ['L2_Caucasian_Nose_BaseSizeX_max', 'L2_Caucasian_Nose_BridgeSizeX_max'],
            'long_nose': ['L2_Caucasian_Nose_SizeY_max'],
            'short_nose': ['L2_Caucasian_Nose_SizeY_min'],
            'full_lips': ['L2_Caucasian_Mouth_UpperlipVolume_max', 'L2_Caucasian_Mouth_LowerlipVolume_max'],
            'thin_lips': ['L2_Caucasian_Mouth_UpperlipVolume_min', 'L2_Caucasian_Mouth_LowerlipVolume_min'],
            'strong_jaw': ['L2_Caucasian_Jaw_Angle_min', 'L2_Caucasian_Jaw_Prominence_max'],
            'soft_jaw': ['L2_Caucasian_Jaw_Angle_max', 'L2_Caucasian_Jaw_Prominence_min'],
            'wide_jaw': ['L2_Caucasian_Jaw_ScaleX_max'],
            'narrow_jaw': ['L2_Caucasian_Jaw_ScaleX_min'],
            'prominent_chin': ['L2_Caucasian_Chin_Prominence_max', 'L2_Caucasian_Chin_SizeZ_max'],
            'weak_chin': ['L2_Caucasian_Chin_Prominence_min', 'L2_Caucasian_Chin_SizeZ_min'],
            'high_cheekbones': ['L2_Caucasian_Cheeks_Zygom_max', 'L2_Caucasian_Cheeks_ZygomPosZ_max'],
            'round_face': ['L2_Caucasian_Face_Ellipsoid_max'],
            'angular_face': ['L2_Caucasian_Face_Triangle_max'],
            'muscular_body': ['L2__Body_Size_max', 'L2__Arms_UpperarmMass-UpperarmTone_max-max'],
            'slim_body': ['L2__Body_Size_min', 'L2__Arms_UpperarmMass-UpperarmTone_min-min'],
            'large_body': ['L2__Body_Size_max', 'L2__Chest_Girth_max'],
            'tall': ['L2__Body_Size_max'],
            'short': ['L2__Body_Size_min'],
            'intelligent': ['L2_Caucasian_Forehead_SizeZ_max'],  # Higher forehead
            'strong': ['L2__Neck_Mass-Tone_max-max', 'L2__Shoulders_Mass-Tone_max-max'],
            'gentle': ['L2_Caucasian_Eyes_Size_max']  # Softer, larger eyes
        }
        
        # Intensity modifiers
        self.intensity_modifiers = {
            'slightly': 0.3, 'somewhat': 0.4, 'moderately': 0.6,
            'very': 0.8, 'extremely': 0.9, 'incredibly': 1.0,
            'quite': 0.7, 'rather': 0.6, 'fairly': 0.5
        }
        
        # Default value for features without intensity modifiers
        self.default_intensity = 0.7

    def analyze_prompt(self, prompt):
        """Analyze the prompt and extract features with spaCy"""
        if self.nlp is None:
            return self._simple_analysis(prompt)
        
        doc = self.nlp(prompt.lower())
        features = {}
        
        # Extract adjectives and nouns that might describe features
        for token in doc:
            if token.pos_ in ['ADJ', 'NOUN']:
                # Check if this word matches any feature keywords
                for feature, keywords in self.feature_keywords.items():
                    for keyword in keywords:
                        if token.text in keyword.split():
                            # Check for intensity modifiers
                            intensity = self.default_intensity
                            for child in token.children:
                                if child.pos_ == 'ADV' and child.text in self.intensity_modifiers:
                                    intensity = self.intensity_modifiers[child.text]
                            
                            features[feature] = intensity
        
        # Also check for multi-word phrases
        for feature, keywords in self.feature_keywords.items():
            for keyword in keywords:
                if keyword in prompt.lower() and feature not in features:
                    features[feature] = self.default_intensity
        
        return features

    def _simple_analysis(self, prompt):
        """Simple analysis without spaCy"""
        prompt_lower = prompt.lower()
        features = {}
        
        for feature, keywords in self.feature_keywords.items():
            for keyword in keywords:
                if keyword in prompt_lower:
                    # Check for intensity words before the keyword
                    intensity = self.default_intensity
                    for modifier, mod_intensity in self.intensity_modifiers.items():
                        if f"{modifier} {keyword}" in prompt_lower:
                            intensity = mod_intensity
                            break
                    
                    features[feature] = intensity
        
        return features

    def map_to_properties(self, prompt):
        """Map the analyzed features to specific character properties"""
        features = self.analyze_prompt(prompt)
        property_values = {}
        
        print(f"Detected features: {features}")
        
        for feature, intensity in features.items():
            if feature in self.property_mapping:
                properties = self.property_mapping[feature]
                for prop in properties:
                    # Check if property exists in our dataset
                    if prop in self.properties:
                        property_values[prop] = intensity
                    else:
                        # Try to find similar properties
                        similar_props = [p for p in self.properties if prop in p]
                        for similar_prop in similar_props:
                            property_values[similar_prop] = intensity
        
        return property_values

# Initialize the mapper
mapper = CharacterPropertyMapper(properties_df)

In [5]:
# Test the mapping system with example prompts

def test_prompt_mapping(prompt):
    """Test the mapping for a given prompt"""
    print(f"\n{'='*60}")
    print(f"PROMPT: '{prompt}'")
    print(f"{'='*60}")
    
    # Get the property mappings
    property_values = mapper.map_to_properties(prompt)
    
    if property_values:
        print(f"\nMapped {len(property_values)} properties:")
        print("-" * 40)
        
        # Group by category for better readability
        categorized_output = {}
        for prop, value in property_values.items():
            for category, props_list in mapper.categories.items():
                if prop in props_list:
                    if category not in categorized_output:
                        categorized_output[category] = []
                    categorized_output[category].append((prop, value))
                    break
        
        # Display by category
        for category, props in categorized_output.items():
            if props:
                print(f"\n{category.upper()}:")
                for prop, value in props:
                    print(f"  {prop}: {value:.2f}")
    else:
        print("No properties mapped for this prompt.")
    
    return property_values

# Test with various prompts
test_prompts = [
    "Create a man with big eyes and a strong jaw",
    "Generate a character with a sharp nose and thin lips",
    "Make a muscular athletic person with high cheekbones",
    "Create an intelligent looking person with a prominent chin",
    "Generate a gentle face with full lips and soft jaw",
    "Make a short person with a round face",
    "Create someone with very large eyes and extremely full lips"
]

print("CHARACTER PROPERTY MAPPING DEMONSTRATION")
print("=" * 60)

results = []
for prompt in test_prompts:
    result = test_prompt_mapping(prompt)
    results.append((prompt, result))

CHARACTER PROPERTY MAPPING DEMONSTRATION

PROMPT: 'Create a man with big eyes and a strong jaw'
Detected features: {'big_eyes': 0.7, 'full_lips': 0.7, 'large_body': 0.7, 'small_eyes': 0.7, 'strong_jaw': 0.7, 'prominent_chin': 0.7, 'strong': 0.7, 'soft_jaw': 0.7, 'wide_jaw': 0.7, 'narrow_jaw': 0.7}

Mapped 16 properties:
----------------------------------------

EYES:
  L2__Eyes_IrisSize_max: 0.70
  L2__Eyes_IrisSize_min: 0.70

ETHNICITY:
  L2_Caucasian_Mouth_UpperlipVolume_max: 0.70
  L2_Caucasian_Mouth_LowerlipVolume_max: 0.70
  L2_Caucasian_Jaw_Angle_min: 0.70
  L2_Caucasian_Jaw_Prominence_max: 0.70
  L2_Caucasian_Chin_Prominence_max: 0.70
  L2_Caucasian_Chin_SizeZ_max: 0.70
  L2_Caucasian_Jaw_Angle_max: 0.70
  L2_Caucasian_Jaw_Prominence_min: 0.70
  L2_Caucasian_Jaw_ScaleX_max: 0.70
  L2_Caucasian_Jaw_ScaleX_min: 0.70

BODY:
  L2__Body_Size_max: 0.70

OTHER:
  L2__Chest_Girth_max: 0.70

NECK:
  L2__Neck_Mass-Tone_max-max: 0.70

SHOULDERS:
  L2__Shoulders_Mass-Tone_max-max: 0.70

PRO

In [6]:
# Interactive testing cell

def interactive_test():
    """Allow interactive testing of prompts"""
    print("Interactive Character Property Mapping")
    print("Type 'quit' to exit\n")
    
    while True:
        user_prompt = input("\nEnter your character description: ")
        
        if user_prompt.lower() in ['quit', 'exit', 'q']:
            break
            
        if user_prompt.strip():
            test_prompt_mapping(user_prompt)
        else:
            print("Please enter a valid description.")

# Uncomment the line below to run interactive testing
interactive_test()

Interactive Character Property Mapping
Type 'quit' to exit


PROMPT: 'rvrkevbn'
Detected features: {}
No properties mapped for this prompt.
