# Django Profile Text Generation Demo

This notebook demonstrates the functionality of the `get_profile_text()` method from the ServiceProviderProfile model.
It shows how to interact with Django ORM to generate comprehensive text representations of profiles for embedding generation.

import os
import sys
import django
import random
from IPython.display import display, Markdown
import pandas as pd

# Add the Django project to Python path
sys.path.insert(0, '/home/mohammed/Desktop/tech_projects/growbal/growbal_django')

# Configure Django settings
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "growbal.settings")

# Allow Django ORM to work in async contexts (Jupyter)
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

# Setup Django
django.setup()

print("Django setup completed successfully!")

In [1]:
import os
import sys
import django
import random
from IPython.display import display, Markdown
import pandas as pd

# Add the Django project to Python path
sys.path.insert(0, '/home/mohammed/Desktop/tech_projects/growbal/growbal_django')

# Configure Django settings
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "growbal.settings")

# Allow Django ORM to work in async contexts (Jupyter)
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

# Setup Django
django.setup()

print("Django setup completed successfully!")

Django setup completed successfully!


## 2. Import Django Models

Now we can import the models and other Django components.

In [2]:
from django.db.models import Count, Q
from accounts.models import ServiceProviderProfile, CustomUser, ServiceProviderMemberProfile
from services.models import Service

print(f"Total profiles in database: {ServiceProviderProfile.objects.count()}")
print(f"Total services in database: {Service.objects.count()}")
print(f"Total users in database: {CustomUser.objects.count()}")

Total profiles in database: 34
Total services in database: 34
Total users in database: 35


## 3. Explore Profile Data

Let's explore the profile data structure and statistics.

In [3]:
# Get profiles with services
profiles_with_services = ServiceProviderProfile.objects.annotate(
    service_count=Count('services')
).filter(service_count__gt=0)

print(f"Profiles with services: {profiles_with_services.count()}")

# Get profiles by country
country_stats = ServiceProviderProfile.objects.values('country').annotate(
    count=Count('id')
).order_by('-count')[:10]

print("\nTop 10 countries by profile count:")
for stat in country_stats:
    print(f"  {stat['country'] or 'Not specified'}: {stat['count']}")

# Get profiles by provider type
provider_type_stats = ServiceProviderProfile.objects.values('provider_type').annotate(
    count=Count('id')
)

print("\nProfiles by provider type:")
for stat in provider_type_stats:
    print(f"  {stat['provider_type']}: {stat['count']}")

Profiles with services: 34

Top 10 countries by profile count:
  UAE: 26
  India: 5
  Singapore: 1
  Bahrain: 1
  UK: 1

Profiles by provider type:
  Company: 34


## 4. Profile Text Generation Function

Let's create helper functions similar to the management command.

In [4]:
def get_random_profile(with_services=False):
    """
    Get a random profile from the database.
    
    Args:
        with_services: If True, only return profiles that have services
    
    Returns:
        ServiceProviderProfile instance or None
    """
    profiles = ServiceProviderProfile.objects.all()
    
    if with_services:
        profiles = profiles.annotate(
            service_count=Count('services')
        ).filter(service_count__gt=0)
    
    if not profiles.exists():
        return None
    
    return random.choice(profiles)


def display_profile_text(profile, save_to_file=False, filename=None):
    """
    Display or save the profile text representation.
    
    Args:
        profile: ServiceProviderProfile instance
        save_to_file: If True, save to file instead of displaying
        filename: Optional filename for saving
    
    Returns:
        dict with profile info and statistics
    """
    if not profile:
        return None
    
    # Get the profile text
    profile_text = profile.get_profile_text()
    
    # Prepare statistics
    stats = {
        'profile_id': profile.id,
        'username': profile.user.username,
        'name': profile.name,
        'text_length': len(profile_text),
        'service_count': profile.services.count(),
        'member_count': profile.members.count() if profile.provider_type == 'Company' else 0
    }
    
    if save_to_file:
        if not filename:
            filename = f"profile_{profile.id}_text.txt"
        
        with open(filename, 'w', encoding='utf-8') as f:
            f.write(f"{'='*80}\n")
            f.write(f"Profile ID: {profile.id}\n")
            f.write(f"Username: {profile.user.username}\n")
            f.write(f"{'='*80}\n\n")
            f.write(profile_text)
            f.write(f"\n\n{'='*80}\n")
            f.write("Statistics:\n")
            f.write(f"- Text length: {stats['text_length']} characters\n")
            f.write(f"- Number of services: {stats['service_count']}\n")
            if profile.provider_type == 'Company':
                f.write(f"- Number of members: {stats['member_count']}\n")
            f.write(f"{'='*80}\n")
        
        print(f"Profile text saved to: {filename}")
    
    return stats, profile_text

print("Helper functions defined successfully!")

Helper functions defined successfully!


## 5. Test Random Profile Text Generation

Let's test the text generation with a random profile.

In [5]:
# Get a random profile
profile = get_random_profile()

if profile:
    stats, profile_text = display_profile_text(profile)
    
    # Display statistics
    print("=" * 80)
    print(f"Profile ID: {stats['profile_id']}")
    print(f"Username: {stats['username']}")
    print(f"Company Name: {stats['name']}")
    print("=" * 80)
    print("\nStatistics:")
    print(f"- Text length: {stats['text_length']} characters")
    print(f"- Number of services: {stats['service_count']}")
    if profile.provider_type == 'Company':
        print(f"- Number of members: {stats['member_count']}")
    print("\nFirst 1000 characters of profile text:")
    print("-" * 80)
    print(profile_text[:1000] + "..." if len(profile_text) > 1000 else profile_text)
else:
    print("No profiles found in the database.")

Profile ID: 170
Username: help@360accountants.co.uk
Company Name: 360 Chartered Accountants

Statistics:
- Text length: 1411 characters
- Number of services: 1
- Number of members: 0

First 1000 characters of profile text:
--------------------------------------------------------------------------------
=== SERVICE PROVIDER PROFILE ===
Company Name: 360 Chartered Accountants
Provider Type: Company
Country: UK
Session Status: inactive
Email Addresses: help@360accountants.co.uk, leanne@360accountants.co.uk
Telephone Numbers: 01482 427360
Website: https://www.360accountants.co.uk
Office Locations: 18/19 Albion Street, Hull, East Yorkshire, HU1 3TG
Key Individuals: Andy Steele (Chairperson), Leanne Steele (Human Resources Director), Sophie Holmes (Client Services Director)

=== SERVICES OFFERED ===

--- Service 1 ---
Title: Chartered Accountancy & Business Advisory Services
Description: 360 Chartered Accountants provides comprehensive accounting, tax, and business advisory services. They of

## 6. Test Profile with Services

Let's specifically test a profile that has services.

In [6]:
# Get a profile with services
profile_with_services = get_random_profile(with_services=True)

if profile_with_services:
    stats, profile_text = display_profile_text(profile_with_services, save_to_file=True, 
                                              filename="profile_with_services_demo.txt")
    
    print("\n" + "=" * 80)
    print(f"Profile: {stats['name']} (ID: {stats['profile_id']})")
    print(f"Services: {stats['service_count']}")
    print("=" * 80)
    
    # Show service details
    print("\nService Details:")
    for i, service in enumerate(profile_with_services.services.all(), 1):
        print(f"\n--- Service {i} ---")
        print(f"Title: {service.service_title}")
        if service.service_tags.exists():
            tags = [tag.name for tag in service.service_tags.all()]
            print(f"Tags: {', '.join(tags)}")
        if service.rating_score:
            print(f"Rating: {service.rating_score}/5")
else:
    print("No profiles with services found.")

Profile text saved to: profile_with_services_demo.txt

Profile: ARC Associates (ID: 197)
Services: 1

Service Details:

--- Service 1 ---
Title: Auditing and Accounting Services
Tags: Audit, Internal Audit, Real Estate Audit, Due Diligence Audit, Investigation Audit, Information System Audit, Accounting Services, ICV Audit, VAT/TAX Services, Forensic Audit
Rating: 4.5/5


## 7. Analyze Multiple Profiles

Let's analyze multiple profiles to see the text length distribution.

In [7]:
# Analyze text lengths for multiple profiles
sample_size = min(10, ServiceProviderProfile.objects.count())
profiles_sample = random.sample(list(ServiceProviderProfile.objects.all()), sample_size)

text_length_data = []

for profile in profiles_sample:
    profile_text = profile.get_profile_text()
    text_length_data.append({
        'profile_id': profile.id,
        'name': profile.name[:30] + '...' if len(profile.name or '') > 30 else profile.name,
        'country': profile.country,
        'provider_type': profile.provider_type,
        'text_length': len(profile_text),
        'services': profile.services.count(),
        'has_vision': bool(profile.vision),
        'has_website': bool(profile.website)
    })

# Create DataFrame
df = pd.DataFrame(text_length_data)
print("Sample Profile Text Length Analysis:")
print("=" * 80)
display(df)

# Summary statistics
print("\nSummary Statistics:")
print(f"Average text length: {df['text_length'].mean():.0f} characters")
print(f"Min text length: {df['text_length'].min()} characters")
print(f"Max text length: {df['text_length'].max()} characters")
print(f"Profiles with services: {(df['services'] > 0).sum()}/{len(df)}")

Sample Profile Text Length Analysis:


Unnamed: 0,profile_id,name,country,provider_type,text_length,services,has_vision,has_website
0,185,AB Capital Services FZE,UAE,Company,1957,1,False,True
1,169,111 Group,UAE,Company,1330,1,False,True
2,190,Adepts Chartered Accountants,UAE,Company,2368,1,False,True
3,179,ATN MAC,UAE,Company,1158,1,False,True
4,201,Abode Options,UAE,Company,1957,1,False,True
5,187,Abdul Majeed Al Marzooqi Audit...,UAE,Company,1670,1,False,True
6,194,AMA Audit Tax Advisory,UAE,Company,1878,1,False,True
7,198,AX CAPITAL,UAE,Company,1521,1,False,True
8,182,Acuvat Auditing,UAE,Company,1446,1,False,True
9,177,AAA Associates Immigration Ser...,UAE,Company,1686,1,False,True



Summary Statistics:
Average text length: 1697 characters
Min text length: 1158 characters
Max text length: 2368 characters
Profiles with services: 10/10


## 8. Search Profiles by Criteria

Let's create functions to search for specific profiles.

In [None]:
def search_profiles(country=None, provider_type=None, has_website=True, limit=5):
    """
    Search for profiles based on criteria.
    """
    queryset = ServiceProviderProfile.objects.all()
    
    if country:
        queryset = queryset.filter(country=country)
    
    if provider_type:
        queryset = queryset.filter(provider_type=provider_type)
    
    if has_website is not None:
        if has_website:
            queryset = queryset.exclude(Q(website__isnull=True) | Q(website=''))
        else:
            queryset = queryset.filter(Q(website__isnull=True) | Q(website=''))
    
    return queryset[:limit]

# Example: Search for UAE companies with websites
uae_companies = search_profiles(country='UAE', provider_type='Company', has_website=True)

print("UAE Companies with Websites:")
print("=" * 80)
for profile in uae_companies:
    print(f"\nCompany: {profile.name}")
    print(f"Website: {profile.website}")
    print(f"Services: {profile.services.count()}")
    print(f"Text preview (first 200 chars):")
    preview = profile.get_profile_text()[:200]
    print(f"  {preview}...")

## 9. Generate Text for Specific Profile ID

Test with a specific profile ID (similar to the management command --id option).

In [None]:
# You can change this ID to test with a specific profile
specific_profile_id = 185  # Example ID from the previous test

try:
    specific_profile = ServiceProviderProfile.objects.get(id=specific_profile_id)
    
    # Generate and display the full text
    profile_text = specific_profile.get_profile_text()
    
    print("=" * 80)
    print(f"Profile ID: {specific_profile.id}")
    print(f"Username: {specific_profile.user.username}")
    print("=" * 80)
    print("\nFull Profile Text:")
    print("-" * 80)
    print(profile_text)
    print("-" * 80)
    print(f"\nTotal characters: {len(profile_text)}")
    
except ServiceProviderProfile.DoesNotExist:
    print(f"Profile with ID {specific_profile_id} not found.")
    print("\nAvailable profile IDs:")
    available_ids = ServiceProviderProfile.objects.values_list('id', flat=True).order_by('id')[:20]
    print(list(available_ids))

## 10. Export Multiple Profiles to Files

Let's create a batch export function to save multiple profile texts.

In [None]:
import os
from datetime import datetime

def batch_export_profiles(profiles, output_dir="profile_texts_export"):
    """
    Export multiple profiles to text files.
    """
    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    exported = []
    for i, profile in enumerate(profiles, 1):
        filename = os.path.join(output_dir, f"profile_{profile.id}_{timestamp}.txt")
        stats, _ = display_profile_text(profile, save_to_file=True, filename=filename)
        exported.append({
            'filename': filename,
            'profile_id': profile.id,
            'name': profile.name,
            'text_length': stats['text_length']
        })
        print(f"Exported {i}/{len(profiles)}: {profile.name}")
    
    # Create summary file
    summary_file = os.path.join(output_dir, f"export_summary_{timestamp}.txt")
    with open(summary_file, 'w') as f:
        f.write(f"Profile Text Export Summary\n")
        f.write(f"Generated: {datetime.now()}\n")
        f.write(f"Total profiles exported: {len(exported)}\n\n")
        
        for item in exported:
            f.write(f"ID: {item['profile_id']} - {item['name']} ({item['text_length']} chars)\n")
    
    print(f"\nExport complete! Summary saved to: {summary_file}")
    return exported

# Export a few random profiles
profiles_to_export = random.sample(list(ServiceProviderProfile.objects.all()), 
                                  min(3, ServiceProviderProfile.objects.count()))
exported_files = batch_export_profiles(profiles_to_export)

## Summary

This notebook demonstrated:

1. Setting up Django ORM in a Jupyter notebook
2. Exploring ServiceProviderProfile data
3. Using the `get_profile_text()` method to generate comprehensive text representations
4. Analyzing text lengths and profile statistics
5. Searching profiles by various criteria
6. Exporting profile texts to files

The generated text includes all profile fields and associated services, making it perfect for:
- Generating embeddings for similarity search
- Creating searchable content
- Exporting profile data for analysis
- Building recommendation systems

In [8]:
import tiktoken

def count_tokens(text, model_name="gpt-3.5-turbo"):
    """
    Count tokens in text using OpenAI's tiktoken.
    
    Args:
        text: The text to count tokens for
        model_name: The model to use for tokenization (default: gpt-3.5-turbo)
        
    Returns:
        int: Number of tokens
    """
    try:
        # Get the encoding for the specified model
        encoding = tiktoken.encoding_for_model(model_name)
        # Count tokens
        token_count = len(encoding.encode(text))
        return token_count
    except Exception as e:
        print(f"Error counting tokens: {e}")
        return 0

def analyze_profiles_with_tokens(sample_size=None, model_name="gpt-3.5-turbo"):
    """
    Analyze profiles with both character length and token count.
    
    Args:
        sample_size: Number of profiles to analyze (None for all)
        model_name: OpenAI model for token counting
        
    Returns:
        DataFrame with profile analysis including token counts
    """
    all_profiles = list(ServiceProviderProfile.objects.all())
    
    if sample_size:
        profiles_sample = random.sample(all_profiles, min(sample_size, len(all_profiles)))
    else:
        profiles_sample = all_profiles
    
    analysis_data = []
    
    print(f"Analyzing {len(profiles_sample)} profiles for token counts...")
    
    for i, profile in enumerate(profiles_sample, 1):
        # Generate profile text
        profile_text = profile.get_profile_text()
        
        # Count tokens
        token_count = count_tokens(profile_text, model_name)
        
        analysis_data.append({
            'profile_id': profile.id,
            'name': profile.name[:40] + '...' if len(profile.name or '') > 40 else profile.name,
            'country': profile.country,
            'provider_type': profile.provider_type,
            'text_length': len(profile_text),
            'token_count': token_count,
            'services': profile.services.count(),
            'members': profile.members.count() if profile.provider_type == 'Company' else 0,
            'has_vision': bool(profile.vision),
            'has_website': bool(profile.website),
            'has_key_individuals': bool(profile.key_individuals),
            'tokens_per_char': round(token_count / len(profile_text), 3) if len(profile_text) > 0 else 0
        })
        
        if i % 10 == 0:
            print(f"Processed {i}/{len(profiles_sample)} profiles...")
    
    return pd.DataFrame(analysis_data)

# Analyze all profiles with token counting
print("Starting comprehensive token analysis...")
df_with_tokens = analyze_profiles_with_tokens()

# Sort by token count (highest first)
df_sorted_by_tokens = df_with_tokens.sort_values('token_count', ascending=False)

print(f"\n{'='*80}")
print("PROFILES WITH HIGHEST TOKEN COUNTS")
print(f"{'='*80}")

# Display top 10 profiles by token count
display(df_sorted_by_tokens.head(10))

# Summary statistics
print(f"\n{'='*50}")
print("TOKEN COUNT STATISTICS")
print(f"{'='*50}")
print(f"Total profiles analyzed: {len(df_with_tokens)}")
print(f"Average tokens per profile: {df_with_tokens['token_count'].mean():.0f}")
print(f"Median tokens per profile: {df_with_tokens['token_count'].median():.0f}")
print(f"Min tokens: {df_with_tokens['token_count'].min()}")
print(f"Max tokens: {df_with_tokens['token_count'].max()}")
print(f"Standard deviation: {df_with_tokens['token_count'].std():.0f}")

# Token efficiency analysis
print(f"\n{'='*50}")
print("TOKEN EFFICIENCY ANALYSIS")
print(f"{'='*50}")
print(f"Average tokens per character: {df_with_tokens['tokens_per_char'].mean():.3f}")
print(f"Most efficient (lowest tokens/char): {df_with_tokens['tokens_per_char'].min():.3f}")
print(f"Least efficient (highest tokens/char): {df_with_tokens['tokens_per_char'].max():.3f}")

# Correlation analysis
print(f"\n{'='*50}")
print("CORRELATION ANALYSIS")
print(f"{'='*50}")
correlation_text_tokens = df_with_tokens['text_length'].corr(df_with_tokens['token_count'])
correlation_services_tokens = df_with_tokens['services'].corr(df_with_tokens['token_count'])
print(f"Correlation between text length and token count: {correlation_text_tokens:.3f}")
print(f"Correlation between service count and token count: {correlation_services_tokens:.3f}")

Starting comprehensive token analysis...
Analyzing 34 profiles for token counts...
Processed 10/34 profiles...
Processed 20/34 profiles...
Processed 30/34 profiles...

PROFILES WITH HIGHEST TOKEN COUNTS


Unnamed: 0,profile_id,name,country,provider_type,text_length,token_count,services,members,has_vision,has_website,has_key_individuals,tokens_per_char
26,189,AKT Associates,India,Company,2264,580,1,0,False,True,True,0.256
18,190,Adepts Chartered Accountants,UAE,Company,2368,564,1,0,False,True,True,0.238
25,186,AKM Global,India,Company,2135,559,1,0,False,True,True,0.262
29,195,AMCA (Abdulla Al Marzooqi Chartered Acco...,UAE,Company,2006,482,1,0,False,True,True,0.24
6,175,AKA Management Consultancy,UAE,Company,2054,455,1,0,False,True,True,0.222
7,176,A&H Consultants LLC,UAE,Company,2210,454,1,0,False,True,True,0.205
17,188,AD Ports Group,UAE,Company,1943,448,1,0,False,True,True,0.231
15,185,AB Capital Services FZE,UAE,Company,1957,445,1,0,False,True,True,0.227
33,201,Abode Options,UAE,Company,1957,443,1,0,False,True,True,0.226
30,197,ARC Associates,UAE,Company,1740,439,1,0,False,True,True,0.252



TOKEN COUNT STATISTICS
Total profiles analyzed: 34
Average tokens per profile: 371
Median tokens per profile: 367
Min tokens: 196
Max tokens: 580
Standard deviation: 98

TOKEN EFFICIENCY ANALYSIS
Average tokens per character: 0.222
Most efficient (lowest tokens/char): 0.192
Least efficient (highest tokens/char): 0.262

CORRELATION ANALYSIS
Correlation between text length and token count: 0.959
Correlation between service count and token count: nan


  c /= stddev[:, None]
  c /= stddev[None, :]


In [9]:
def analyze_top_token_profiles(df, top_n=5):
    """
    Detailed analysis of profiles with highest token counts.
    """
    top_profiles = df.nlargest(top_n, 'token_count')
    
    print(f"{'='*80}")
    print(f"DETAILED ANALYSIS OF TOP {top_n} PROFILES BY TOKEN COUNT")
    print(f"{'='*80}")
    
    for idx, row in top_profiles.iterrows():
        print(f"\n{'-'*60}")
        print(f"RANK #{top_profiles.index.get_loc(idx) + 1}")
        print(f"{'-'*60}")
        
        # Get the actual profile for detailed analysis
        try:
            profile = ServiceProviderProfile.objects.get(id=row['profile_id'])
            profile_text = profile.get_profile_text()
            
            print(f"Profile ID: {row['profile_id']}")
            print(f"Name: {profile.name}")
            print(f"Country: {row['country']}")
            print(f"Token Count: {row['token_count']:,}")
            print(f"Character Length: {row['text_length']:,}")
            print(f"Tokens per Character: {row['tokens_per_char']}")
            print(f"Services: {row['services']}")
            if profile.provider_type == 'Company':
                print(f"Members: {row['members']}")
            
            # Show text breakdown
            lines = profile_text.split('\n')
            non_empty_lines = [line for line in lines if line.strip()]
            print(f"Text Lines: {len(non_empty_lines)}")
            
            # Show first 200 characters
            print(f"\nText Preview:")
            print(f"'{profile_text[:200]}...'")
            
        except ServiceProviderProfile.DoesNotExist:
            print(f"Profile {row['profile_id']} not found")
    
    return top_profiles

# Analyze top 5 profiles
top_profiles_detailed = analyze_top_token_profiles(df_sorted_by_tokens, top_n=5)

DETAILED ANALYSIS OF TOP 5 PROFILES BY TOKEN COUNT

------------------------------------------------------------
RANK #1
------------------------------------------------------------
Profile ID: 189
Name: AKT Associates
Country: India
Token Count: 580
Character Length: 2,264
Tokens per Character: 0.256
Services: 1
Members: 0
Text Lines: 27

Text Preview:
'=== SERVICE PROVIDER PROFILE ===
Company Name: AKT Associates
Provider Type: Company
Country: India
Session Status: inactive
Email Addresses: cs@aktassociates.com
Mobile Numbers: 8080088288
Website: h...'

------------------------------------------------------------
RANK #2
------------------------------------------------------------
Profile ID: 190
Name: Adepts Chartered Accountants
Country: UAE
Token Count: 564
Character Length: 2,368
Tokens per Character: 0.238
Services: 1
Members: 0
Text Lines: 29

Text Preview:
'=== SERVICE PROVIDER PROFILE ===
Company Name: Adepts Chartered Accountants
Provider Type: Company
Country: UAE
Session 

In [None]:
import tiktoken
import random
import numpy as np
from IPython.display import display, Markdown
import pandas as pd
from dotenv import load_dotenv

# Import the embedding utilities
from accounts.embedding_utils import EmbeddingGenerator

load_dotenv("/home/mohammed/Desktop/tech_projects/growbal/crawler/envs/1.env")

def enhanced_profile_embedding_analysis():
    """
    Enhanced version with token counting and detailed analysis.
    """
    print("="*80)
    print("ENHANCED PROFILE EMBEDDING ANALYSIS")
    print("="*80)
    
    # Pick a random profile
    all_profiles = list(ServiceProviderProfile.objects.all())
    if not all_profiles:
        print("No profiles found in the database.")
        return None
    
    random_profile = random.choice(all_profiles)
    
    # Generate profile text
    profile_text = random_profile.get_profile_text()
    
    # Count tokens
    try:
        encoding = tiktoken.encoding_for_model("text-embedding-ada-002")
        token_count = len(encoding.encode(profile_text))
    except Exception as e:
        print(f"Error counting tokens: {e}")
        token_count = 0
    
    print(f"\n🎯 Selected Profile Analysis:")
    print(f"   Profile ID: {random_profile.id}")
    print(f"   Name: {random_profile.name}")
    print(f"   Username: {random_profile.user.username}")
    print(f"   Country: {random_profile.country}")
    print(f"   Provider Type: {random_profile.provider_type}")
    
    print(f"\n📊 Text Statistics:")
    print(f"   Character Length: {len(profile_text):,}")
    print(f"   Token Count: {token_count:,}")
    print(f"   Tokens per Character: {token_count/len(profile_text):.3f}" if len(profile_text) > 0 else "   Tokens per Character: 0")
    print(f"   Services: {random_profile.services.count()}")
    if random_profile.provider_type == 'Company':
        print(f"   Members: {random_profile.members.count()}")
    
    # Check if embedding already exists
    had_existing_embedding = bool(random_profile.profile_embedding)
    print(f"\n🔍 Embedding Status:")
    print(f"   Had Existing Embedding: {'Yes' if had_existing_embedding else 'No'}")
    
    # Generate embedding
    try:
        generator = EmbeddingGenerator(model="text-embedding-ada-002", api_key=os.getenv("OPENAI_API_KEY"))
        print(f"\n🚀 Generating OpenAI Embedding...")
        
        # Time the embedding generation
        import time
        start_time = time.time()
        embedding = generator.update_profile_embedding(random_profile)
        end_time = time.time()
        generation_time = end_time - start_time
        
        print(f"✅ Embedding Generated in {generation_time:.2f} seconds")
        
    except Exception as e:
        print(f"❌ Error generating embedding: {e}")
        return None
    
    # Refresh and analyze the stored embedding
    random_profile.refresh_from_db()
    embedding_array = np.array(random_profile.profile_embedding)
    
    print(f"\n📈 Embedding Analysis:")
    print(f"   Dimensions: {len(embedding_array)}")
    print(f"   Value Range: [{embedding_array.min():.6f}, {embedding_array.max():.6f}]")
    print(f"   Mean: {embedding_array.mean():.6f}")
    print(f"   Standard Deviation: {embedding_array.std():.6f}")
    print(f"   L2 Norm: {np.linalg.norm(embedding_array):.6f}")
    
    # Calculate cost estimation (approximate)
    # text-embedding-ada-002 costs $0.0001 per 1K tokens
    estimated_cost = (token_count / 1000) * 0.0001
    print(f"\n💰 Cost Estimation:")
    print(f"   Estimated API Cost: ${estimated_cost:.6f}")
    
    # Show text breakdown
    lines = profile_text.split('\n')
    non_empty_lines = [line for line in lines if line.strip()]
    print(f"\n📝 Text Breakdown:")
    print(f"   Total Lines: {len(lines)}")
    print(f"   Non-empty Lines: {len(non_empty_lines)}")
    
    # Show profile text sections
    sections = profile_text.split('===')
    print(f"   Text Sections: {len(sections)}")
    for i, section in enumerate(sections[:3], 1):  # Show first 3 sections
        section_clean = section.strip()
        if section_clean:
            print(f"     Section {i}: {section_clean[:50]}..." if len(section_clean) > 50 else f"     Section {i}: {section_clean}")
    
    # Display full results
    results_data = {
        'Profile ID': random_profile.id,
        'Name': random_profile.name[:30] + '...' if len(random_profile.name or '') > 30 else random_profile.name,
        'Country': random_profile.country,
        'Type': random_profile.provider_type,
        'Chars': len(profile_text),
        'Tokens': token_count,
        'Tokens/Char': round(token_count/len(profile_text), 3) if len(profile_text) > 0 else 0,
        'Services': random_profile.services.count(),
        'Embedding Dims': len(embedding_array),
        'Generation Time (s)': round(generation_time, 2),
        'Estimated Cost ($)': round(estimated_cost, 6),
        'Had Existing': had_existing_embedding
    }
    
    print(f"\n📋 Complete Results:")
    df_results = pd.DataFrame([results_data])
    display(df_results)
    
    # Show text preview
    print(f"\n📖 Profile Text Preview (first 500 characters):")
    print("-" * 60)
    print(profile_text[:500] + "..." if len(profile_text) > 500 else profile_text)
    print("-" * 60)
    
    return random_profile, embedding_array, results_data

# Execute the enhanced analysis
profile, embedding, results = enhanced_profile_embedding_analysis()

ENHANCED PROFILE EMBEDDING ANALYSIS

🎯 Selected Profile Analysis:
   Profile ID: 188
   Name: AD Ports Group
   Username: customerservice@adports.ae
   Country: UAE
   Provider Type: Company

📊 Text Statistics:
   Character Length: 1,943
   Token Count: 448
   Tokens per Character: 0.231
   Services: 1
   Members: 0

🔍 Embedding Status:
   Had Existing Embedding: No

🚀 Generating OpenAI Embedding...
❌ Error generating embedding: OpenAI API key not configured. Set OPENAI_API_KEY in settings.


TypeError: cannot unpack non-iterable NoneType object