In [1]:
# --- Smart Lead AI: Synthetic Dataset Generator ---

import pandas as pd
import numpy as np
from faker import Faker
import random

# Initialize Faker
fake = Faker()

# Define dataset size
num_samples = 1000

# Possible values
lead_sources = ['Google Ads', 'LinkedIn', 'Referral', 'Instagram', 'Facebook', 'Email Campaign']
countries = ['India', 'USA', 'UK', 'Germany', 'Australia', 'Canada']

data = []

for _ in range(num_samples):
    source = random.choice(lead_sources)
    country = random.choice(countries)
    pages_visited = np.random.randint(1, 20)
    time_on_site = np.round(np.random.uniform(0.5, 30.0), 2)
    email_opens = np.random.randint(0, 10)
    interaction_score = np.random.randint(10, 100)
    profile_complete = np.random.randint(30, 100)
    previous_purchases = np.random.randint(0, 2)
    
    # Target label (converted) - synthetic logic
    converted = int(
        (interaction_score > 60 and profile_complete > 70)
        or (previous_purchases == 1)
        or (email_opens > 5 and time_on_site > 10)
    )
    
    data.append([
        fake.uuid4(), source, country, pages_visited, time_on_site, email_opens,
        interaction_score, profile_complete, previous_purchases, converted
    ])

# Create DataFrame
df = pd.DataFrame(data, columns=[
    'lead_id', 'lead_source', 'country', 'pages_visited', 'time_on_site',
    'email_opens', 'interaction_score', 'profile_complete', 'previous_purchases', 'converted'
])

# Save dataset
df.to_csv('../data/lead_data.csv', index=False)

df.head()


Unnamed: 0,lead_id,lead_source,country,pages_visited,time_on_site,email_opens,interaction_score,profile_complete,previous_purchases,converted
0,75f3d1c5-3915-407f-ba59-f33d993706ed,Instagram,UK,13,10.09,6,90,52,1,1
1,1cfc1030-ff1c-43b7-a01c-f9b30cbea1c9,Referral,UK,2,18.55,9,17,77,1,1
2,8ac309fa-b017-492e-b1e0-b90a827e1250,Email Campaign,India,15,29.43,6,84,56,0,1
3,5c6bd21e-4df0-49ea-8b6a-075e4473b494,Referral,Canada,13,19.82,9,96,93,1,1
4,80bf7f7c-8786-4e37-80f7-34211ad7959c,Facebook,UK,14,1.84,7,59,82,1,1
