<a href="https://colab.research.google.com/github/MichaelMW/NameSmithy/blob/main/NameSmithy_Colab_Demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🔨 NameSmithy - Interactive Baby Name Generator & Evaluator

**GAN-Based Baby Name Generator trained on 140+ years of US naming data**

✨ **Features:**
- 🎯 Generate unique names that "look" popular using ML
- 📊 Evaluate any name with historical context and scoring
- 🧠 Trained on 77K+ names from 1880-2018 US data
- 🚀 Instant results with no setup required

**Instructions:** Run all cells below, then interact with the demo interface!

## 📦 Install Dependencies

In [None]:
!pip install gradio scikit-learn numpy requests -q
print("✅ Dependencies installed!")

## 📥 Download Models and Data

In [ ]:
import requests
import pickle
import os
from pathlib import Path

# Create models directory
os.makedirs('models', exist_ok=True)

# Download model files from GitHub repository
base_url = "https://github.com/MichaelMW/NameSmithy/raw/main/models"

files_to_download = [
    ("judge/gbr.n100.genz.v3", "GBR Model"),
    ("names/genz.avg.tsv", "Names Database"),
    ("badwords/bad.merged.txt", "Bad Words Filter")
]

print("📥 Downloading NameSmithy models...")
for file_path, description in files_to_download:
    url = f"{base_url}/{file_path}"
    local_path = f"models/{file_path}"
    
    # Create directory if needed
    os.makedirs(os.path.dirname(local_path), exist_ok=True)
    
    try:
        print(f"  📄 Downloading {description}...")
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        
        with open(local_path, 'wb') as f:
            f.write(response.content)
        print(f"  ✅ {description} downloaded ({len(response.content)} bytes)")
    except Exception as e:
        print(f"  ❌ Failed to download {description}: {e}")

print("\n🎉 Model download completed!")

## 🧠 Core NameSmithy Functions

In [None]:
import numpy as np
import pickle
import time
import random
from pathlib import Path

# Global variables
gbr_model = None
known_names = {}
bad_words = set()

# Character mapping for name vectorization
chars = sorted(list(set('abcdefghijklmnopqrstuvwxyz ')))
char_to_int = {c: i for i, c in enumerate(chars)}
int_to_char = {i: c for i, c in enumerate(chars)}

def name_to_vec(name, max_length=15):
    """Convert name to vector for ML model."""
    name = name.lower()
    filler = max_length - len(name)
    return [char_to_int.get(char, 0) for char in name + ' ' * filler]

def format_score(score):
    """Format score for display."""
    if isinstance(score, str):
        return score
    return round(score * 100, 1) if score is not None else "N/A"

def get_quality_tier(score):
    """Convert score to quality description."""
    if score < 0:
        return "Inappropriate"
    elif score < 0.2:
        return "Poor"
    elif score < 0.4:
        return "Fair" 
    elif score < 0.6:
        return "Good"
    elif score < 0.8:
        return "Very Good"
    else:
        return "Excellent"

print("✅ Core functions defined!")

In [None]:
def load_models():
    """Load all NameSmithy models and data."""
    global gbr_model, known_names, bad_words
    
    print("🔨 Loading NameSmithy models...")
    
    # Load GBR model
    try:
        with open('models/judge/gbr.n100.genz.v3', 'rb') as f:
            gbr_model = pickle.load(f)
        print("✅ Loaded GBR scoring model")
    except Exception as e:
        print(f"❌ Could not load GBR model: {e}")
        gbr_model = None
    
    # Load known names database
    try:
        with open('models/names/genz.avg.tsv', 'r') as f:
            for line in f.readlines():
                parts = line.strip().split()
                if len(parts) >= 3:
                    name = parts[0]
                    sex = 0 if parts[1] == "F" else 1
                    rank = float(parts[2])
                    vec = name_to_vec(name)
                    feature_key = tuple([sex] + vec)
                    known_names[feature_key] = rank
        print(f"✅ Loaded {len(known_names)} known names")
    except Exception as e:
        print(f"❌ Could not load names database: {e}")
    
    # Load bad words filter
    try:
        with open('models/badwords/bad.merged.txt', 'r') as f:
            for line in f.readlines():
                word = line.strip().lower()
                if word:
                    bad_words.add(word)
        print(f"✅ Loaded {len(bad_words)} filtered words")
    except Exception as e:
        print(f"❌ Could not load bad words filter: {e}")
    
    print("🎉 Model loading completed!")

# Load models now
load_models()

## 🎯 Name Generation Logic

In [None]:
def generate_name_simple(gender='F', seed=None):
    """Generate a name using simple pattern-based logic."""
    if seed is None:
        seed = int(time.time() * 1000000) % 999999
    
    np.random.seed(seed)
    random.seed(seed)
    
    name = ""
    max_length = 12
    
    # Generate character by character
    for i in range(max_length):
        if len(name) == 0:
            # First character - gender-specific starting letters
            if gender == 'F':
                first_chars = ['a', 'e', 'i', 'o', 'j', 'm', 's', 'k', 'l', 'c', 'n', 'r', 'b', 'h', 'g', 'v', 'z', 'p']
                weights = [0.12, 0.10, 0.08, 0.06, 0.08, 0.08, 0.07, 0.06, 0.06, 0.05, 0.05, 0.04, 0.04, 0.03, 0.03, 0.02, 0.02, 0.01]
                current_char = np.random.choice(first_chars, p=weights)
            else:
                first_chars = ['a', 'j', 'm', 'r', 'd', 'c', 'b', 'l', 't', 'n', 's', 'k', 'g', 'h', 'w', 'p', 'v', 'z']
                weights = [0.10, 0.09, 0.08, 0.08, 0.07, 0.07, 0.06, 0.06, 0.06, 0.05, 0.05, 0.04, 0.04, 0.04, 0.03, 0.03, 0.02, 0.02]
                current_char = np.random.choice(first_chars, p=weights)
        else:
            # Pattern-based next character
            last_char = name[-1] if name else ' '
            
            vowels = 'aeiou'
            consonants = 'bcdfghjklmnpqrstvwxyz'
            
            if last_char in vowels:
                # After vowel, often consonant
                consonant_prob = 0.6 + np.random.random() * 0.2
                if np.random.random() < consonant_prob:
                    current_char = np.random.choice(list(consonants))
                else:
                    current_char = np.random.choice(list(vowels))
            else:
                # After consonant, often vowel
                vowel_prob = 0.7 + np.random.random() * 0.2
                if np.random.random() < vowel_prob:
                    current_char = np.random.choice(list(vowels))
                else:
                    current_char = np.random.choice(list(consonants))
            
            # End name probability
            min_length = 3 + np.random.randint(0, 3)
            end_prob = 0.2 + (len(name) - min_length) * 0.1
            if len(name) >= min_length and np.random.random() < end_prob:
                break
        
        name += current_char
    
    # Clean up the name
    name = name.strip().capitalize()
    
    # Ensure reasonable length
    if len(name) < 3:
        name += np.random.choice(['a', 'e', 'i', 'o'])
    
    return name if name else "Nora"

def evaluate_name_ml(name, gender='F'):
    """Evaluate a name using the ML model."""
    if not gbr_model:
        return {
            'name': name,
            'score': 50.0,
            'quality_tier': 'Unknown',
            'known_rank': 'Model not loaded',
            'appropriate': True
        }
    
    # Check if inappropriate
    name_lower = name.lower()
    is_appropriate = name_lower not in bad_words
    
    # Create feature vector
    sex_bit = 0 if gender == 'F' else 1
    name_vec = name_to_vec(name)
    features = np.array([[sex_bit] + name_vec])
    
    # Get ML prediction
    try:
        prediction = gbr_model.predict(features)[0]
        raw_score = prediction
        
        # Apply bad word penalty
        if not is_appropriate:
            raw_score = -1.0
            
        # Format score
        display_score = format_score(raw_score)
        quality_tier = get_quality_tier(raw_score)
        
    except Exception as e:
        print(f"Prediction error: {e}")
        raw_score = 0.5
        display_score = 50.0
        quality_tier = "Unknown"
    
    # Check if name is in historical database
    feature_key = tuple([sex_bit] + name_vec)
    known_rank = known_names.get(feature_key, "Not found")
    
    return {
        'name': name,
        'score': display_score,
        'raw_score': raw_score,
        'quality_tier': quality_tier,
        'known_rank': known_rank,
        'appropriate': is_appropriate
    }

print("✅ Generation and evaluation functions ready!")

## 🎨 Interactive Interface

In [None]:
import gradio as gr
import pandas as pd

def generate_names_interface(count, gender, min_score, style):
    """Generate names for Gradio interface."""
    results = []
    attempts = 0
    max_attempts = count * 20  # Reasonable limit
    
    print(f"🎯 Generating {count} {gender} names (min score: {min_score}, style: {style})")
    
    while len(results) < count and attempts < max_attempts:
        attempts += 1
        
        # Generate name
        if style == "unique":
            name = generate_name_simple(gender)
        else:  # random or popular - for demo, use same generation
            name = generate_name_simple(gender)
        
        # Evaluate name
        evaluation = evaluate_name_ml(name, gender)
        
        # Check if meets criteria
        score = evaluation['raw_score']
        if score * 100 >= min_score and evaluation['appropriate']:
            # Check if unique (not in database) for unique style
            if style == "unique" and evaluation['known_rank'] != "Not found":
                continue
            
            results.append({
                'Name': evaluation['name'],
                'Score': evaluation['score'],
                'Quality': evaluation['quality_tier'],
                'Historical': evaluation['known_rank']
            })
    
    if not results:
        return pd.DataFrame([{
            'Name': 'No names found',
            'Score': 'N/A',
            'Quality': 'Try lower min score',
            'Historical': 'N/A'
        }])
    
    # Sort by score
    results.sort(key=lambda x: float(str(x['Score']).replace('%', '')), reverse=True)
    
    print(f"✅ Generated {len(results)} names in {attempts} attempts")
    return pd.DataFrame(results)

def evaluate_name_interface(name, gender):
    """Evaluate a single name for Gradio interface."""
    if not name or not name.strip():
        return {"error": "Please enter a name to evaluate"}
    
    evaluation = evaluate_name_ml(name.strip(), gender)
    
    return {
        "Name": evaluation['name'],
        "Score": f"{evaluation['score']}%",
        "Quality Tier": evaluation['quality_tier'],
        "Historical Rank": evaluation['known_rank'],
        "Appropriate": "Yes" if evaluation['appropriate'] else "No",
        "Raw Score": round(evaluation['raw_score'], 3)
    }

print("✅ Interface functions ready!")

In [ ]:
# Create the Gradio interface
with gr.Blocks(title="NameSmithy - AI Baby Name Generator", theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # 🔨 NameSmithy - AI Baby Name Generator
        
        **Generate unique baby names that look historically popular using machine learning!**
        
        Trained on 140+ years of US baby names (77K+ names from 1880-2018)
        """
    )
    
    with gr.Tabs():
        with gr.TabItem("🎯 Name Generator"):
            gr.Markdown("### Generate Names with AI")
            
            with gr.Row():
                with gr.Column():
                    count_input = gr.Slider(
                        minimum=1, maximum=20, value=5, step=1,
                        label="Number of names"
                    )
                    gender_input = gr.Radio(
                        choices=["F", "M"], value="F",
                        label="Gender"
                    )
                    min_score_input = gr.Slider(
                        minimum=0, maximum=100, value=70, step=5,
                        label="Minimum Quality Score"
                    )
                    style_input = gr.Radio(
                        choices=["random", "unique", "popular"], value="unique",
                        label="Style (unique = never existed before)"
                    )
                    
                    generate_btn = gr.Button("🎯 Generate Names", variant="primary")
                
                with gr.Column():
                    names_output = gr.Dataframe(
                        headers=["Name", "Score", "Quality", "Historical"],
                        label="Generated Names"
                    )
            
            generate_btn.click(
                fn=generate_names_interface,
                inputs=[count_input, gender_input, min_score_input, style_input],
                outputs=names_output
            )
        
        with gr.TabItem("📊 Name Evaluator"):
            gr.Markdown("### Evaluate Any Name")
            
            with gr.Row():
                with gr.Column():
                    name_input = gr.Textbox(
                        label="Name to evaluate",
                        placeholder="Enter any name...",
                        value="Emma"
                    )
                    eval_gender_input = gr.Radio(
                        choices=["F", "M"], value="F",
                        label="Gender"
                    )
                    
                    evaluate_btn = gr.Button("📊 Evaluate Name", variant="primary")
                
                with gr.Column():
                    eval_output = gr.JSON(
                        label="Evaluation Results"
                    )
            
            evaluate_btn.click(
                fn=evaluate_name_interface,
                inputs=[name_input, eval_gender_input],
                outputs=eval_output
            )
    
    gr.Markdown(
        """
        ---
        **How it works:** Our GAN-based model learned patterns from 77K+ historical US baby names. 
        Higher scores indicate names that statistically resemble popular historical names.
        
        **🔗 [GitHub Repository](https://github.com/MichaelMW/NameSmithy)** | **⭐ Star if you like it!**
        """
    )

print("✅ Gradio interface created!")

## 🚀 Launch Demo

In [None]:
# Launch the demo with public sharing
print("🚀 Launching NameSmithy demo...")
print("📝 This will create a public URL you can share with anyone!")
print("⏱️  The link will be active for 72 hours.")

demo.launch(
    share=True,           # Create public URL
    debug=True,           # Show errors
    server_name="0.0.0.0",
    server_port=7860
)