In [None]:
!pip install spacy
!spacy download en_core_web_sm

In [2]:
import pandas as pd
import spacy
import re
from typing import Dict, List, Tuple, Any
import numpy as np
import openai
import requests
import json

In [3]:
def load_properties(file_path):
    """Load the character properties from CSV file"""
    try:
        df = pd.read_csv(file_path)
        print(f"Loaded {len(df)} properties from CSV")
        return df
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        return None

def categorize_properties(properties_list):
    """Categorize properties into different body parts and features"""
    categories = {
        'ethnicity': [],
        'body': [],
        'face': [],
        'eyes': [],
        'nose': [],
        'mouth': [],
        'ears': [],
        'cheeks': [],
        'chin': [],
        'jaw': [],
        'forehead': [],
        'eyebrows': [],
        'fantasy': [],
        'arms': [],
        'legs': [],
        'hands': [],
        'feet': [],
        'neck': [],
        'shoulders': [],
        'pelvis': [],
        'torso': [],
        'stomach': [],
        'waist': [],
        'expression': [],
        'other': []
    }
    
    for prop in properties_list:
        prop_lower = prop.lower()
        
        # Ethnicity detection
        if any(eth in prop_lower for eth in ['african', 'asian', 'caucasian', 'elf', 'dwarf', 'latin']):
            categories['ethnicity'].append(prop)
        # Body parts
        elif 'body' in prop_lower:
            categories['body'].append(prop)
        elif 'eye' in prop_lower:
            categories['eyes'].append(prop)
        elif 'nose' in prop_lower:
            categories['nose'].append(prop)
        elif 'mouth' in prop_lower:
            categories['mouth'].append(prop)
        elif 'ear' in prop_lower:
            categories['ears'].append(prop)
        elif 'cheek' in prop_lower:
            categories['cheeks'].append(prop)
        elif 'chin' in prop_lower:
            categories['chin'].append(prop)
        elif 'jaw' in prop_lower:
            categories['jaw'].append(prop)
        elif 'forehead' in prop_lower:
            categories['forehead'].append(prop)
        elif 'eyebrow' in prop_lower:
            categories['eyebrows'].append(prop)
        elif 'fantasy' in prop_lower:
            categories['fantasy'].append(prop)
        elif 'arm' in prop_lower:
            categories['arms'].append(prop)
        elif 'leg' in prop_lower:
            categories['legs'].append(prop)
        elif 'hand' in prop_lower:
            categories['hands'].append(prop)
        elif 'foot' in prop_lower or 'feet' in prop_lower:
            categories['feet'].append(prop)
        elif 'neck' in prop_lower:
            categories['neck'].append(prop)
        elif 'shoulder' in prop_lower:
            categories['shoulders'].append(prop)
        elif 'pelvis' in prop_lower:
            categories['pelvis'].append(prop)
        elif 'torso' in prop_lower:
            categories['torso'].append(prop)
        elif 'stomach' in prop_lower:
            categories['stomach'].append(prop)
        elif 'waist' in prop_lower:
            categories['waist'].append(prop)
        elif any(expr in prop_lower for expr in ['smile', 'brow', 'pupil', 'tongue', 'nostril']):
            categories['expression'].append(prop)
        else:
            categories['other'].append(prop)
    
    return categories

# Load spaCy model
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    print("Please install the spaCy English model first:")
    print("python -m spacy download en_core_web_sm")
    nlp = None

# Load the properties
properties_df = load_properties("data/New-Text-Document.csv")

if properties_df is not None:
    print("\nFirst 10 properties:")
    print(properties_df.head(10))
    
    print(f"\nTotal properties: {len(properties_df)}")
    print(f"Column name: {properties_df.columns[0]}")
    
    # Categorize properties
    property_list = properties_df[properties_df.columns[0]].tolist()
    categories = categorize_properties(property_list)
    
    print("\nProperty Categories Summary:")
    for category, props in categories.items():
        print(f"{category}: {len(props)} properties")

Loaded 2020 properties from CSV

First 10 properties:
                 male_properties
0                          Basis
1                     L1_African
2                       L1_Anime
3                       L1_Asian
4                   L1_Caucasian
5                       L1_Dwarf
6                         L1_Elf
7                       L1_Latin
8  L2__Abdomen_Mass-Tone_max-max
9  L2__Abdomen_Mass-Tone_max-min

Total properties: 2020
Column name: male_properties

Property Categories Summary:
ethnicity: 1500 properties
body: 3 properties
face: 0 properties
eyes: 72 properties
nose: 53 properties
mouth: 61 properties
ears: 32 properties
cheeks: 16 properties
chin: 10 properties
jaw: 14 properties
forehead: 10 properties
eyebrows: 0 properties
fantasy: 5 properties
arms: 14 properties
legs: 38 properties
hands: 18 properties
feet: 12 properties
neck: 25 properties
shoulders: 16 properties
pelvis: 20 properties
torso: 26 properties
stomach: 4 properties
waist: 2 properties
expression: 2

In [4]:
class CulturalContextAnalyzer:
    def __init__(self):
        # Geographic and cultural mappings
        self.regional_features = {
            # Asian regions
            'east asia': ['china', 'japan', 'korea', 'taiwan', 'mongolia'],
            'south asia': ['india', 'pakistan', 'bangladesh', 'sri lanka', 'nepal'],
            'southeast asia': ['thailand', 'vietnam', 'indonesia', 'malaysia', 'philippines'],
            'central asia': ['kazakhstan', 'uzbekistan', 'turkmenistan'],
            
            # European regions
            'northern europe': ['scandinavia', 'sweden', 'norway', 'denmark', 'finland'],
            'southern europe': ['italy', 'spain', 'greece', 'portugal'],
            'eastern europe': ['russia', 'poland', 'ukraine', 'czech', 'hungary'],
            'western europe': ['france', 'germany', 'uk', 'britain', 'netherlands'],
            
            # African regions
            'north africa': ['egypt', 'morocco', 'algeria', 'tunisia'],
            'sub-saharan': ['nigeria', 'kenya', 'ghana', 'south africa', 'ethiopia'],
            'west africa': ['senegal', 'ivory coast', 'mali'],
            'east africa': ['tanzania', 'uganda', 'rwanda'],
            
            # Americas
            'north america': ['usa', 'canada', 'mexico'],
            'latin america': ['brazil', 'argentina', 'colombia', 'peru', 'chile'],
            'caribbean': ['jamaica', 'cuba', 'haiti', 'dominican'],
            
            # Middle East
            'middle east': ['arab', 'saudi', 'uae', 'iran', 'turkey', 'israel']
        }
        
        # Cultural stereotypes and typical features (for indirect inference)
        self.cultural_stereotypes = {
            'east asia': ['epicanthic fold', 'monolid', 'straight black hair', 'pale skin', 'small nose'],
            'south asia': ['olive skin', 'dark hair', 'brown eyes', 'prominent nose', 'thick eyebrows'],
            'northern europe': ['fair skin', 'blonde hair', 'blue eyes', 'tall', 'angular features'],
            'southern europe': ['olive skin', 'dark hair', 'brown eyes', 'roman nose', 'expressive features'],
            'africa': ['dark skin', 'broad nose', 'full lips', 'curly hair', 'strong jawline'],
            'middle east': ['olive skin', 'dark hair', 'prominent nose', 'thick eyebrows', 'beard growth']
        }
        
        # Lifestyle to physical trait mappings
        self.lifestyle_mappings = {
            'foody': ['L2__Stomach_LocalFat_max', 'L2__Abdomen_Mass-Tone_max-max', 'L2__Body_Size_max'],
            'athletic': ['L2__Body_Size_min', 'L2__Arms_UpperarmMass-UpperarmTone_max-max', 'L2__Shoulders_Mass-Tone_max-max'],
            'sedentary': ['L2__Stomach_LocalFat_max', 'L2__Body_Size_max', 'L2__Abdomen_Mass-Tone_max-max'],
            'outdoorsy': ['L2_Caucasian_Skin_Freckles_max', 'L2__Body_Size_min', 'L2__Arms_UpperarmMass-UpperarmTone_max-max'],
            'intellectual': ['L2_Caucasian_Forehead_SizeZ_max', 'L2_Caucasian_Eyes_Size_min'],
            'manual laborer': ['L2__Arms_UpperarmMass-UpperarmTone_max-max', 'L2__Hands_Mass-Tone_max-max', 'L2__Shoulders_Mass-Tone_max-max'],
            'wealthy': ['L2__Body_Size_min', 'L2_Caucasian_Skin_Complexion_max'],
            'rural': ['L2_Caucasian_Skin_Freckles_max', 'L2__Body_Size_max', 'L2__Hands_Mass-Tone_max-max'],
            'urban': ['L2__Body_Size_min', 'L2_Caucasian_Skin_Complexion_min']
        }
        
        # Profession to feature mappings
        self.profession_mappings = {
            'soldier': ['L2__Body_Size_min', 'L2__Arms_UpperarmMass-UpperarmTone_max-max', 'L2__Shoulders_Mass-Tone_max-max', 'short hair'],
            'farmer': ['L2__Body_Size_max', 'L2_Caucasian_Skin_Freckles_max', 'L2__Hands_Mass-Tone_max-max'],
            'teacher': ['L2_Caucasian_Forehead_SizeZ_max', 'L2_Caucasian_Eyes_Size_max'],
            'model': ['L2__Body_Size_min', 'L2_Caucasian_Face_Symmetry_max', 'L2_Caucasian_Skin_Complexion_max'],
            'chef': ['L2__Stomach_LocalFat_max', 'L2__Body_Size_max'],
            'athlete': ['L2__Body_Size_min', 'L2__Arms_UpperarmMass-UpperarmTone_max-max', 'L2__Legs_UpperlegsMass-UpperlegsTone_max-max']
        }
        
        # Personality to physical trait mappings
        self.personality_mappings = {
            'friendly': ['L2_Caucasian_Mouth_Smile_max', 'L2_Caucasian_Eyes_Size_max', 'L2_Caucasian_Cheeks_Zygom_max'],
            'serious': ['L2_Caucasian_Eyebrows_Angle_min', 'L2_Caucasian_Mouth_SizeX_min', 'L2_Caucasian_Forehead_SizeZ_max'],
            'energetic': ['L2_Caucasian_Eyes_Size_max', 'L2_Caucasian_Mouth_Smile_max', 'L2__Body_Size_min'],
            'calm': ['L2_Caucasian_Eyes_Size_min', 'L2_Caucasian_Face_Ellipsoid_max', 'L2_Caucasian_Mouth_SizeX_min'],
            'intelligent': ['L2_Caucasian_Forehead_SizeZ_max', 'L2_Caucasian_Eyes_Size_min'],
            'strong willed': ['L2_Caucasian_Jaw_Prominence_max', 'L2_Caucasian_Chin_Prominence_max', 'L2_Caucasian_Eyebrows_Angle_min']
        }

    def analyze_geographic_context(self, text):
        """Extract geographic and cultural context from text"""
        text_lower = text.lower()
        detected_regions = []
        cultural_features = {}
        
        # Detect mentioned regions
        for region, countries in self.regional_features.items():
            for country in countries:
                if country in text_lower:
                    detected_regions.append(region)
                    # Add cultural stereotypes for detected region
                    if region in self.cultural_stereotypes:
                        for feature in self.cultural_stereotypes[region]:
                            cultural_features[feature] = 0.7
                    break
        
        return detected_regions, cultural_features

    def analyze_lifestyle_context(self, text):
        """Extract lifestyle information and map to physical traits"""
        text_lower = text.lower()
        lifestyle_traits = {}
        
        for lifestyle, properties in self.lifestyle_mappings.items():
            if lifestyle in text_lower:
                for prop in properties:
                    lifestyle_traits[prop] = 0.7
        
        return lifestyle_traits

    def analyze_profession_context(self, text):
        """Extract profession information and map to physical traits"""
        text_lower = text.lower()
        profession_traits = {}
        
        for profession, properties in self.profession_mappings.items():
            if profession in text_lower:
                for prop in properties:
                    profession_traits[prop] = 0.7
        
        return profession_traits

    def analyze_personality_context(self, text):
        """Extract personality information and map to physical traits"""
        text_lower = text.lower()
        personality_traits = {}
        
        for personality, properties in self.personality_mappings.items():
            if personality in text_lower:
                for prop in properties:
                    personality_traits[prop] = 0.6
        
        return personality_traits

    def analyze_complex_prompt(self, text):
        """Comprehensive analysis of complex prompts"""
        # Geographic analysis
        regions, cultural_features = self.analyze_geographic_context(text)
        
        # Lifestyle analysis
        lifestyle_traits = self.analyze_lifestyle_context(text)
        
        # Profession analysis
        profession_traits = self.analyze_profession_context(text)
        
        # Personality analysis
        personality_traits = self.analyze_personality_context(text)
        
        # Combine all indirect features
        indirect_features = {}
        indirect_features.update(cultural_features)
        indirect_features.update(lifestyle_traits)
        indirect_features.update(profession_traits)
        indirect_features.update(personality_traits)
        
        analysis_result = {
            'detected_regions': regions,
            'cultural_features': cultural_features,
            'lifestyle_traits': lifestyle_traits,
            'profession_traits': profession_traits,
            'personality_traits': personality_traits,
            'all_indirect_features': indirect_features
        }
        
        return analysis_result


In [5]:

class CharacterPropertyMapper:
    def __init__(self, properties_df):
        self.properties = properties_df[properties_df.columns[0]].tolist()
        self.categories = categorize_properties(self.properties)
        self.nlp = nlp
        
        # Enhanced feature mapping dictionary
        self.feature_keywords = {
            'big_eyes': ['big eyes', 'large eyes', 'wide eyes', 'expressive eyes'],
            'small_eyes': ['small eyes', 'narrow eyes', 'squinty eyes'],
            'sharp_nose': ['sharp nose', 'pointed nose', 'angular nose'],
            'wide_nose': ['wide nose', 'broad nose', 'flat nose'],
            'long_nose': ['long nose', 'prominent nose'],
            'short_nose': ['short nose', 'small nose'],
            'full_lips': ['full lips', 'plump lips', 'big lips', 'luscious lips'],
            'thin_lips': ['thin lips', 'small lips', 'narrow lips'],
            'strong_jaw': ['strong jaw', 'defined jaw', 'angular jaw', 'square jaw'],
            'soft_jaw': ['soft jaw', 'round jaw', 'gentle jaw'],
            'wide_jaw': ['wide jaw', 'broad jaw'],
            'narrow_jaw': ['narrow jaw', 'thin jaw'],
            'prominent_chin': ['strong chin', 'prominent chin', 'defined chin'],
            'weak_chin': ['weak chin', 'receding chin', 'small chin'],
            'high_cheekbones': ['high cheekbones', 'prominent cheeks', 'defined cheeks'],
            'round_face': ['round face', 'full face', 'chubby face'],
            'angular_face': ['angular face', 'sharp face', 'defined face'],
            'muscular_body': ['muscular', 'athletic', 'toned', 'fit'],
            'slim_body': ['slim', 'thin', 'lean', 'slender'],
            'large_body': ['large', 'big', 'heavy', 'stocky'],
            'tall': ['tall', 'height'],
            'short': ['short', 'small stature'],
            'young': ['young', 'youthful', 'boyish', 'girlish'],
            'old': ['old', 'aged', 'elderly', 'wrinkled'],
            'intelligent': ['intelligent', 'smart', 'wise', 'clever'],
            'strong': ['strong', 'powerful', 'robust', 'sturdy'],
            'gentle': ['gentle', 'kind', 'soft', 'friendly']
        }
        
        # Property mapping with intensity
        self.property_mapping = {
            'big_eyes': ['L2__Eyes_Size_max', 'L2__Eyes_IrisSize_max'],
            'small_eyes': ['L2__Eyes_Size_min', 'L2__Eyes_IrisSize_min'],
            'sharp_nose': ['L2_Caucasian_Nose_TipSize_min', 'L2_Caucasian_Nose_BridgeSizeX_min'],
            'wide_nose': ['L2_Caucasian_Nose_BaseSizeX_max', 'L2_Caucasian_Nose_BridgeSizeX_max'],
            'long_nose': ['L2_Caucasian_Nose_SizeY_max'],
            'short_nose': ['L2_Caucasian_Nose_SizeY_min'],
            'full_lips': ['L2_Caucasian_Mouth_UpperlipVolume_max', 'L2_Caucasian_Mouth_LowerlipVolume_max'],
            'thin_lips': ['L2_Caucasian_Mouth_UpperlipVolume_min', 'L2_Caucasian_Mouth_LowerlipVolume_min'],
            'strong_jaw': ['L2_Caucasian_Jaw_Angle_min', 'L2_Caucasian_Jaw_Prominence_max'],
            'soft_jaw': ['L2_Caucasian_Jaw_Angle_max', 'L2_Caucasian_Jaw_Prominence_min'],
            'wide_jaw': ['L2_Caucasian_Jaw_ScaleX_max'],
            'narrow_jaw': ['L2_Caucasian_Jaw_ScaleX_min'],
            'prominent_chin': ['L2_Caucasian_Chin_Prominence_max', 'L2_Caucasian_Chin_SizeZ_max'],
            'weak_chin': ['L2_Caucasian_Chin_Prominence_min', 'L2_Caucasian_Chin_SizeZ_min'],
            'high_cheekbones': ['L2_Caucasian_Cheeks_Zygom_max', 'L2_Caucasian_Cheeks_ZygomPosZ_max'],
            'round_face': ['L2_Caucasian_Face_Ellipsoid_max'],
            'angular_face': ['L2_Caucasian_Face_Triangle_max'],
            'muscular_body': ['L2__Body_Size_max', 'L2__Arms_UpperarmMass-UpperarmTone_max-max'],
            'slim_body': ['L2__Body_Size_min', 'L2__Arms_UpperarmMass-UpperarmTone_min-min'],
            'large_body': ['L2__Body_Size_max', 'L2__Chest_Girth_max'],
            'tall': ['L2__Body_Size_max'],
            'short': ['L2__Body_Size_min'],
            'intelligent': ['L2_Caucasian_Forehead_SizeZ_max'],
            'strong': ['L2__Neck_Mass-Tone_max-max', 'L2__Shoulders_Mass-Tone_max-max'],
            'gentle': ['L2_Caucasian_Eyes_Size_max']
        }
        
        # Intensity modifiers
        self.intensity_modifiers = {
            'slightly': 0.3, 'somewhat': 0.4, 'moderately': 0.6,
            'very': 0.8, 'extremely': 0.9, 'incredibly': 1.0,
            'quite': 0.7, 'rather': 0.6, 'fairly': 0.5
        }
        
        # Default value for features without intensity modifiers
        self.default_intensity = 0.7

    def analyze_prompt(self, prompt):
        """Analyze the prompt and extract features with spaCy"""
        if self.nlp is None:
            return self._simple_analysis(prompt)
        
        doc = self.nlp(prompt.lower())
        features = {}
        
        # Extract adjectives and nouns that might describe features
        for token in doc:
            if token.pos_ in ['ADJ', 'NOUN']:
                # Check if this word matches any feature keywords
                for feature, keywords in self.feature_keywords.items():
                    for keyword in keywords:
                        if token.text in keyword.split():
                            # Check for intensity modifiers
                            intensity = self.default_intensity
                            for child in token.children:
                                if child.pos_ == 'ADV' and child.text in self.intensity_modifiers:
                                    intensity = self.intensity_modifiers[child.text]
                            
                            features[feature] = intensity
        
        # Also check for multi-word phrases
        for feature, keywords in self.feature_keywords.items():
            for keyword in keywords:
                if keyword in prompt.lower() and feature not in features:
                    features[feature] = self.default_intensity
        
        return features

    def _simple_analysis(self, prompt):
        """Simple analysis without spaCy"""
        prompt_lower = prompt.lower()
        features = {}
        
        for feature, keywords in self.feature_keywords.items():
            for keyword in keywords:
                if keyword in prompt_lower:
                    # Check for intensity words before the keyword
                    intensity = self.default_intensity
                    for modifier, mod_intensity in self.intensity_modifiers.items():
                        if f"{modifier} {keyword}" in prompt_lower:
                            intensity = mod_intensity
                            break
                    
                    features[feature] = intensity
        
        return features

    def map_to_properties(self, prompt):
        """Map the analyzed features to specific character properties"""
        features = self.analyze_prompt(prompt)
        property_values = {}
        
        print(f"Detected features: {features}")
        
        for feature, intensity in features.items():
            if feature in self.property_mapping:
                properties = self.property_mapping[feature]
                for prop in properties:
                    # Check if property exists in our dataset
                    if prop in self.properties:
                        property_values[prop] = intensity
                    else:
                        # Try to find similar properties
                        similar_props = [p for p in self.properties if prop in p]
                        for similar_prop in similar_props:
                            property_values[similar_prop] = intensity
        
        return property_values

# Initialize the base mapper
mapper = CharacterPropertyMapper(properties_df)

In [6]:
class EnhancedCharacterPropertyMapper(CharacterPropertyMapper):
    def __init__(self, properties_df):
        super().__init__(properties_df)
        self.cultural_analyzer = CulturalContextAnalyzer()
        
        # Enhanced keyword mappings for indirect descriptions
        self.indirect_keywords = {
            'food_lover': ['foody', 'loves food', 'eats a lot', 'big eater', 'enjoys food', 'food lover'],
            'athletic': ['athletic', 'sports', 'works out', 'gym goer', 'fit', 'active lifestyle'],
            'sedentary': ['sedentary', 'desk job', 'office worker', 'sits all day', 'inactive'],
            'outdoors': ['outdoors', 'nature lover', 'hiker', 'camper', 'outdoor activities'],
            'intellectual': ['intellectual', 'bookish', 'academic', 'scholar', 'thinker', 'philosopher'],
            'manual_worker': ['manual labor', 'construction', 'farming', 'physical work', 'blue collar'],
            'wealthy': ['wealthy', 'rich', 'affluent', 'upper class', 'well-off'],
            'rural': ['rural', 'countryside', 'village', 'farm', 'agricultural'],
            'urban': ['urban', 'city', 'metropolitan', 'downtown']
        }
        
        # Update property mapping with indirect traits
        self.property_mapping.update({
            'food_lover': ['L2__Stomach_LocalFat_max', 'L2__Abdomen_Mass-Tone_max-max', 'L2__Body_Size_max'],
            'sedentary': ['L2__Stomach_LocalFat_max', 'L2__Body_Size_max', 'L2__Abdomen_Mass-Tone_max-max'],
            'outdoors': ['L2_Caucasian_Skin_Freckles_max', 'L2__Body_Size_min', 'L2__Arms_UpperarmMass-UpperarmTone_max-max'],
            'manual_worker': ['L2__Arms_UpperarmMass-UpperarmTone_max-max', 'L2__Hands_Mass-Tone_max-max', 'L2__Shoulders_Mass-Tone_max-max'],
            'wealthy': ['L2__Body_Size_min', 'L2_Caucasian_Skin_Complexion_max'],
            'rural': ['L2_Caucasian_Skin_Freckles_max', 'L2__Body_Size_max', 'L2__Hands_Mass-Tone_max-max'],
            'urban': ['L2__Body_Size_min', 'L2_Caucasian_Skin_Complexion_min']
        })

    def enhanced_analyze_prompt(self, prompt):
        """Enhanced analysis that includes both direct and indirect feature extraction"""
        # Get direct features from parent class
        direct_features = self.analyze_prompt(prompt)
        
        # Get indirect features from cultural context analysis
        cultural_analysis = self.cultural_analyzer.analyze_complex_prompt(prompt)
        indirect_features = cultural_analysis['all_indirect_features']
        
        # Also check for indirect keywords
        prompt_lower = prompt.lower()
        for feature, keywords in self.indirect_keywords.items():
            for keyword in keywords:
                if keyword in prompt_lower:
                    indirect_features[feature] = 0.7
        
        # Merge features (direct features take precedence if there's overlap)
        all_features = {**indirect_features, **direct_features}
        
        analysis_result = {
            'direct_features': direct_features,
            'cultural_analysis': cultural_analysis,
            'indirect_features': indirect_features,
            'all_features': all_features
        }
        
        return analysis_result

    def enhanced_map_to_properties(self, prompt):
        """Enhanced mapping that includes indirect feature analysis"""
        analysis = self.enhanced_analyze_prompt(prompt)
        property_values = {}
        
        print(f"Direct features: {analysis['direct_features']}")
        print(f"Indirect features: {analysis['indirect_features']}")
        if analysis['cultural_analysis']['detected_regions']:
            print(f"Detected regions: {analysis['cultural_analysis']['detected_regions']}")
        
        # Map all features to properties
        for feature, intensity in analysis['all_features'].items():
            if feature in self.property_mapping:
                properties = self.property_mapping[feature]
                for prop in properties:
                    # Check if property exists in our dataset
                    if prop in self.properties:
                        property_values[prop] = intensity
                    else:
                        # Try to find similar properties
                        similar_props = [p for p in self.properties if prop in p]
                        for similar_prop in similar_props:
                            property_values[similar_prop] = intensity
        
        return property_values, analysis

# Initialize the enhanced mapper
enhanced_mapper = EnhancedCharacterPropertyMapper(properties_df)


In [16]:
class GPTCharacterAnalyzer:
    def __init__(self, api_key=None):
        self.api_key = api_key
        # If no API key, we'll use a fallback approach
        self.use_gpt = api_key is not None
        if self.use_gpt:
            openai.api_key = api_key
    
    def analyze_with_gpt(self, prompt, properties_list):
        """Use GPT to analyze the prompt and map to character properties"""
        if not self.use_gpt:
            return self.fallback_analysis(prompt, properties_list)
        
        system_message = """You are a character analysis expert. Analyze the description and map it to specific character properties.
        Consider both direct physical descriptions and indirect clues (geographic, cultural, lifestyle, profession, personality).
        
        Return a JSON with:
        - "analysis": brief explanation of what physical traits are implied
        - "properties": dictionary of property_name -> confidence_score (0.1 to 1.0)
        - "reasoning": why each property was chosen
        
        Focus on mapping to the available properties."""
        
        try:
            # Updated for new OpenAI API
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": system_message},
                    {"role": "user", "content": f"Prompt: {prompt}\n\nAvailable properties: {properties_list[:100]}"}  # Fixed the escaped characters
                ],
                temperature=0.3,
                max_tokens=1500
            )
            
            result = response.choices[0].message.content
            return self.parse_gpt_response(result)
            
        except Exception as e:
            print(f"GPT API error: {e}")
            return self.fallback_analysis(prompt, properties_list)
    
    def parse_gpt_response(self, response_text):
        """Parse GPT response into structured data"""
        try:
            # Try to extract JSON from response
            if '```json' in response_text:
                json_str = response_text.split('```json')[1].split('```')[0]
            elif '```' in response_text:
                json_str = response_text.split('```')[1]
            else:
                json_str = response_text
            
            return json.loads(json_str)
        except:
            # Fallback: simple keyword matching
            return {"properties": {}, "analysis": "Failed to parse GPT response", "reasoning": "JSON parsing failed"}
    
    def fallback_analysis(self, prompt, properties_list):
        """Fallback analysis when GPT is not available"""
        return {
            "analysis": "Using fallback analysis",
            "properties": {},
            "reasoning": "GPT not available"
        }

class GPTEnhancedCharacterPropertyMapper(CharacterPropertyMapper):
    def __init__(self, properties_df, gpt_api_key=None):
        super().__init__(properties_df)
        self.gpt_analyzer = GPTCharacterAnalyzer(gpt_api_key)
        self.property_descriptions = self.create_property_descriptions()
    
    def create_property_descriptions(self):
        """Create human-readable descriptions of properties for GPT"""
        descriptions = {}
        for prop in self.properties:
            # Convert property names to readable descriptions
            desc = prop.replace('L2__', '').replace('L3_', '').replace('_', ' ').lower()
            descriptions[prop] = desc
        return descriptions
    
    def gpt_enhanced_map(self, prompt):
        """Use GPT for advanced understanding of complex prompts"""
        print("Using GPT for enhanced analysis...")
        gpt_result = self.gpt_analyzer.analyze_with_gpt(prompt, self.properties)
        
        property_values = {}
        
        # Map GPT-identified properties
        if 'properties' in gpt_result and gpt_result['properties']:
            print(f"GPT identified {len(gpt_result['properties'])} properties")
            for prop_name, confidence in gpt_result['properties'].items():
                # Find the closest matching property
                matched_prop = self.find_best_property_match(prop_name)
                if matched_prop:
                    property_values[matched_prop] = float(confidence)
                    print(f"GPT mapped '{prop_name}' -> '{matched_prop}' with confidence {confidence}")
        else:
            print("GPT did not identify any specific properties")
        
        # Also run our rule-based analysis as fallback
        print("Running rule-based analysis as fallback...")
        rule_based_values = super().map_to_properties(prompt)
        
        # Merge results (GPT takes precedence)
        for prop, value in rule_based_values.items():
            if prop not in property_values:
                property_values[prop] = value
        
        print(f"Final mapping: {len(property_values)} properties")
        return property_values, gpt_result
    
    def find_best_property_match(self, description):
        """Find the best matching property for a given description"""
        description_lower = description.lower()
        best_match = None
        best_score = 0
        
        for prop, prop_desc in self.property_descriptions.items():
            score = self.calculate_similarity(description_lower, prop_desc.lower())
            if score > best_score and score > 0.3:  # Threshold
                best_score = score
                best_match = prop
        
        return best_match
    
    def calculate_similarity(self, text1, text2):
        """Simple text similarity calculation"""
        words1 = set(text1.split())
        words2 = set(text2.split())
        
        if not words1 or not words2:
            return 0
        
        intersection = words1.intersection(words2)
        union = words1.union(words2)
        
        return len(intersection) / len(union)

# Use your API key
key = "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"

# Initialize GPT-enhanced mapper
gpt_mapper = GPTEnhancedCharacterPropertyMapper(properties_df, key)

In [17]:
def test_prompt_mapping(prompt):
    """Test the mapping for a given prompt"""
    print(f"\n{'='*60}")
    print(f"PROMPT: '{prompt}'")
    print(f"{'='*60}")
    
    # Get the property mappings
    property_values = mapper.map_to_properties(prompt)
    
    if property_values:
        print(f"\nMapped {len(property_values)} properties:")
        print("-" * 40)
        
        # Group by category for better readability
        categorized_output = {}
        for prop, value in property_values.items():
            for category, props_list in mapper.categories.items():
                if prop in props_list:
                    if category not in categorized_output:
                        categorized_output[category] = []
                    categorized_output[category].append((prop, value))
                    break
        
        # Display by category
        for category, props in categorized_output.items():
            if props:
                print(f"\n{category.upper()}:")
                for prop, value in props:
                    print(f"  {prop}: {value:.2f}")
    else:
        print("No properties mapped for this prompt.")
    
    return property_values

# NEW: Add GPT testing function
def test_gpt_prompt_mapping(prompt):
    """Test GPT-enhanced mapping for a given prompt"""
    print(f"\n{'='*80}")
    print(f"GPT ENHANCED PROMPT: '{prompt}'")
    print(f"{'='*80}")
    
    # Get the GPT-enhanced property mappings
    property_values, gpt_result = gpt_mapper.gpt_enhanced_map(prompt)
    
    if 'analysis' in gpt_result:
        print(f"\nGPT Analysis: {gpt_result['analysis']}")
    if 'reasoning' in gpt_result:
        print(f"GPT Reasoning: {gpt_result['reasoning']}")
    
    if property_values:
        print(f"\nMapped {len(property_values)} properties:")
        print("-" * 40)
        
        # Group by category for better readability
        categorized_output = {}
        for prop, value in property_values.items():
            for category, props_list in gpt_mapper.categories.items():
                if prop in props_list:
                    if category not in categorized_output:
                        categorized_output[category] = []
                    categorized_output[category].append((prop, value))
                    break
        
        # Display by category
        for category, props in categorized_output.items():
            if props:
                print(f"\n{category.upper()}:")
                for prop, value in props:
                    print(f"  {prop}: {value:.2f}")
    else:
        print("No properties mapped for this prompt.")
    
    return property_values, gpt_result

In [18]:
# Cell 9: Basic Prompt Testing (keep existing)
test_prompts = [
    "Create a man with big eyes and a strong jaw",
    "Generate a character with a sharp nose and thin lips",
    "Make a muscular athletic person with high cheekbones",
    "Create an intelligent looking person with a prominent chin",
    "Generate a gentle face with full lips and soft jaw",
    "Make a short person with a round face",
    "Create someone with very large eyes and extremely full lips"
]

print("CHARACTER PROPERTY MAPPING DEMONSTRATION")
print("=" * 60)

results = []
for prompt in test_prompts:
    result = test_prompt_mapping(prompt)
    results.append((prompt, result))

# NEW: GPT Testing Section
print("\n\n" + "="*80)
print("GPT-ENHANCED CHARACTER PROPERTY MAPPING DEMONSTRATION")
print("=" * 80)

gpt_test_prompts = [
    "Create a tall Nordic warrior with blonde hair and blue eyes",
    "Generate an elderly wise Chinese scholar with a long beard",
    "Make a fit Brazilian soccer player with tanned skin",
    "Create a sophisticated French fashion model with delicate features"
]

gpt_results = []
for prompt in gpt_test_prompts:
    result, gpt_analysis = test_gpt_prompt_mapping(prompt)
    gpt_results.append((prompt, result, gpt_analysis))

CHARACTER PROPERTY MAPPING DEMONSTRATION

PROMPT: 'Create a man with big eyes and a strong jaw'
Detected features: {'big_eyes': 0.7, 'full_lips': 0.7, 'large_body': 0.7, 'small_eyes': 0.7, 'strong_jaw': 0.7, 'prominent_chin': 0.7, 'strong': 0.7, 'soft_jaw': 0.7, 'wide_jaw': 0.7, 'narrow_jaw': 0.7}

Mapped 16 properties:
----------------------------------------

EYES:
  L2__Eyes_IrisSize_max: 0.70
  L2__Eyes_IrisSize_min: 0.70

ETHNICITY:
  L2_Caucasian_Mouth_UpperlipVolume_max: 0.70
  L2_Caucasian_Mouth_LowerlipVolume_max: 0.70
  L2_Caucasian_Jaw_Angle_min: 0.70
  L2_Caucasian_Jaw_Prominence_max: 0.70
  L2_Caucasian_Chin_Prominence_max: 0.70
  L2_Caucasian_Chin_SizeZ_max: 0.70
  L2_Caucasian_Jaw_Angle_max: 0.70
  L2_Caucasian_Jaw_Prominence_min: 0.70
  L2_Caucasian_Jaw_ScaleX_max: 0.70
  L2_Caucasian_Jaw_ScaleX_min: 0.70

BODY:
  L2__Body_Size_max: 0.70

OTHER:
  L2__Chest_Girth_max: 0.70

NECK:
  L2__Neck_Mass-Tone_max-max: 0.70

SHOULDERS:
  L2__Shoulders_Mass-Tone_max-max: 0.70

PRO

In [10]:
# Cell 9: Enhanced Testing Function
def test_complex_prompt_mapping(prompt):
    """Test the enhanced mapping for complex, indirect prompts"""
    print(f"\n{'='*80}")
    print(f"COMPLEX PROMPT: '{prompt}'")
    print(f"{'='*80}")
    
    # Get the enhanced property mappings
    property_values, analysis = enhanced_mapper.enhanced_map_to_properties(prompt)
    
    if property_values:
        print(f"\nMapped {len(property_values)} properties:")
        print("-" * 40)
        
        # Group by category for better readability
        categorized_output = {}
        for prop, value in property_values.items():
            for category, props_list in enhanced_mapper.categories.items():
                if prop in props_list:
                    if category not in categorized_output:
                        categorized_output[category] = []
                    categorized_output[category].append((prop, value))
                    break
        
        # Display by category
        for category, props in categorized_output.items():
            if props:
                print(f"\n{category.upper()}:")
                for prop, value in props:
                    print(f"  {prop}: {value:.2f}")
    else:
        print("No properties mapped for this prompt.")
    
    return property_values, analysis

In [11]:
# Cell 10: Complex Prompt Testing
complex_test_prompts = [
    "Soham was a foody who loved eating all kinds of cuisine",
    "Soham lived in north eastern india and had typical features of the region",
    "An intellectual professor from Germany with a thoughtful expression",
    "A manual laborer who worked construction in Brazil for 20 years",
    "Wealthy business executive from New York who never exercises",
    "Outdoor enthusiast from Canada who spends all her time hiking",
    "A farmer from rural China with weathered skin from working in the fields",
    "A soldier from Russia with a strong, disciplined appearance",
    "Sedentary office worker from Japan who sits at a computer all day",
    "A model from France with delicate features and perfect skin",
    "An energetic teacher from California who is always smiling"
]

print("ENHANCED CHARACTER PROPERTY MAPPING WITH CONTEXTUAL ANALYSIS")
print("=" * 80)

complex_results = []
for prompt in complex_test_prompts:
    result, analysis = test_complex_prompt_mapping(prompt)
    complex_results.append((prompt, result, analysis))


ENHANCED CHARACTER PROPERTY MAPPING WITH CONTEXTUAL ANALYSIS

COMPLEX PROMPT: 'Soham was a foody who loved eating all kinds of cuisine'
Direct features: {'gentle': 0.7}
Indirect features: {'L2__Stomach_LocalFat_max': 0.7, 'L2__Abdomen_Mass-Tone_max-max': 0.7, 'L2__Body_Size_max': 0.7, 'food_lover': 0.7}

Mapped 4 properties:
----------------------------------------

STOMACH:
  L2__Stomach_LocalFat_max: 0.70

OTHER:
  L2__Abdomen_Mass-Tone_max-max: 0.70

BODY:
  L2__Body_Size_max: 0.70

ETHNICITY:
  L2_Caucasian_Eyes_Size_max: 0.70

COMPLEX PROMPT: 'Soham lived in north eastern india and had typical features of the region'
Direct features: {}
Indirect features: {'olive skin': 0.7, 'dark hair': 0.7, 'brown eyes': 0.7, 'prominent nose': 0.7, 'thick eyebrows': 0.7}
Detected regions: ['south asia']
No properties mapped for this prompt.

COMPLEX PROMPT: 'An intellectual professor from Germany with a thoughtful expression'
Direct features: {}
Indirect features: {'L2_Caucasian_Forehead_SizeZ_m

In [12]:

# Cell 11: Cultural Analysis Demonstration
def demonstrate_cultural_analysis():
    """Show detailed cultural analysis for understanding the system"""
    
    test_texts = [
        "A person from northern India with typical features",
        "Someone from Scandinavia with light features",
        "A manual worker from rural China",
        "An intellectual from urban Japan"
    ]
    
    print("CULTURAL CONTEXT ANALYSIS DEMONSTRATION")
    print("=" * 60)
    
    for text in test_texts:
        print(f"\nText: '{text}'")
        analysis = enhanced_mapper.cultural_analyzer.analyze_complex_prompt(text)
        
        print(f"Detected regions: {analysis['detected_regions']}")
        print(f"Cultural features: {analysis['cultural_features']}")
        print(f"Lifestyle traits: {analysis['lifestyle_traits']}")
        print(f"Profession traits: {analysis['profession_traits']}")
        print(f"Personality traits: {analysis['personality_traits']}")
        print("-" * 40)

# Run the demonstration
demonstrate_cultural_analysis()



CULTURAL CONTEXT ANALYSIS DEMONSTRATION

Text: 'A person from northern India with typical features'
Detected regions: ['south asia']
Cultural features: {'olive skin': 0.7, 'dark hair': 0.7, 'brown eyes': 0.7, 'prominent nose': 0.7, 'thick eyebrows': 0.7}
Lifestyle traits: {}
Profession traits: {}
Personality traits: {}
----------------------------------------

Text: 'Someone from Scandinavia with light features'
Detected regions: ['northern europe']
Cultural features: {'fair skin': 0.7, 'blonde hair': 0.7, 'blue eyes': 0.7, 'tall': 0.7, 'angular features': 0.7}
Lifestyle traits: {}
Profession traits: {}
Personality traits: {}
----------------------------------------

Text: 'A manual worker from rural China'
Detected regions: ['east asia']
Cultural features: {'epicanthic fold': 0.7, 'monolid': 0.7, 'straight black hair': 0.7, 'pale skin': 0.7, 'small nose': 0.7}
Lifestyle traits: {'L2_Caucasian_Skin_Freckles_max': 0.7, 'L2__Body_Size_max': 0.7, 'L2__Hands_Mass-Tone_max-max': 0.7}
Profes

In [13]:
# Cell 12: Interactive Testing
def interactive_test():
    """Allow interactive testing of prompts"""
    print("Interactive Character Property Mapping")
    print("Type 'quit' to exit\n")
    
    while True:
        user_prompt = input("\nEnter your character description: ")
        
        if user_prompt.lower() in ['quit', 'exit', 'q']:
            break
            
        if user_prompt.strip():
            test_prompt_mapping(user_prompt)
        else:
            print("Please enter a valid description.")

# Uncomment the line below to run interactive testing
interactive_test()


Interactive Character Property Mapping
Type 'quit' to exit


PROMPT: 'foody man living in north india'
Detected features: {}
No properties mapped for this prompt.


In [14]:

# Cell 13: Enhanced Interactive Testing
def enhanced_interactive_test():
    """Allow interactive testing of complex prompts"""
    print("ENHANCED INTERACTIVE CHARACTER PROPERTY MAPPING")
    print("Now with cultural, lifestyle, and contextual analysis!")
    print("Type 'quit' to exit\n")
    
    while True:
        user_prompt = input("\nEnter your complex character description: ")
        
        if user_prompt.lower() in ['quit', 'exit', 'q']:
            break
            
        if user_prompt.strip():
            test_complex_prompt_mapping(user_prompt)
        else:
            print("Please enter a valid description.")

# Uncomment to run enhanced interactive testing
enhanced_interactive_test()



ENHANCED INTERACTIVE CHARACTER PROPERTY MAPPING
Now with cultural, lifestyle, and contextual analysis!
Type 'quit' to exit


COMPLEX PROMPT: 'foody man living in north india'
Direct features: {}
Indirect features: {'olive skin': 0.7, 'dark hair': 0.7, 'brown eyes': 0.7, 'prominent nose': 0.7, 'thick eyebrows': 0.7, 'L2__Stomach_LocalFat_max': 0.7, 'L2__Abdomen_Mass-Tone_max-max': 0.7, 'L2__Body_Size_max': 0.7, 'food_lover': 0.7}
Detected regions: ['south asia']

Mapped 3 properties:
----------------------------------------

STOMACH:
  L2__Stomach_LocalFat_max: 0.70

OTHER:
  L2__Abdomen_Mass-Tone_max-max: 0.70

BODY:
  L2__Body_Size_max: 0.70


In [15]:
# Cell 14: Summary and Analysis
print("\n" + "="*80)
print("SYSTEM SUMMARY")
print("="*80)
print(f"Total properties available: {len(properties_df)}")
print(f"Categories identified: {len(mapper.categories)}")
print("\nCategory distribution:")
for category, props in mapper.categories.items():
    if props:  # Only show categories that have properties
        print(f"  {category}: {len(props)} properties")

print("\nSystem Features:")
print("  ✓ Basic physical feature mapping")
print("  ✓ Cultural context analysis")
print("  ✓ Lifestyle and profession inference")
print("  ✓ Personality trait mapping")
print("  ✓ Intensity-based property assignment")
print("  ✓ Multi-level categorization")

# Cell 15: Example Usage for Integration
def get_character_properties(prompt, use_enhanced=True):
    """
    Main function to get character properties from a text prompt
    
    Args:
        prompt (str): The character description
        use_enhanced (bool): Whether to use enhanced analysis
    
    Returns:
        dict: Property names mapped to intensity values
    """
    if use_enhanced:
        property_values, analysis = enhanced_mapper.enhanced_map_to_properties(prompt)
        return property_values, analysis
    else:
        property_values = mapper.map_to_properties(prompt)
        return property_values, None

# Example usage:
properties, analysis = get_character_properties("A tall muscular man with sharp features")
print(properties)


SYSTEM SUMMARY
Total properties available: 2020
Categories identified: 25

Category distribution:
  ethnicity: 1500 properties
  body: 3 properties
  eyes: 72 properties
  nose: 53 properties
  mouth: 61 properties
  ears: 32 properties
  cheeks: 16 properties
  chin: 10 properties
  jaw: 14 properties
  forehead: 10 properties
  fantasy: 5 properties
  arms: 14 properties
  legs: 38 properties
  hands: 18 properties
  feet: 12 properties
  neck: 25 properties
  shoulders: 16 properties
  pelvis: 20 properties
  torso: 26 properties
  stomach: 4 properties
  waist: 2 properties
  expression: 22 properties
  other: 47 properties

System Features:
  ✓ Basic physical feature mapping
  ✓ Cultural context analysis
  ✓ Lifestyle and profession inference
  ✓ Personality trait mapping
  ✓ Intensity-based property assignment
  ✓ Multi-level categorization
Direct features: {'tall': 0.7, 'muscular_body': 0.7, 'sharp_nose': 0.7, 'angular_face': 0.7}
Indirect features: {}
{'L2__Body_Size_max': 0.7