In [1]:
from mimesis.schema import Field, Schema
from mimesis import Generic
from mimesis.enums import Gender
from collections import defaultdict
import random
import csv
from datetime import date as pydate
from tqdm import tqdm
import datetime

# Please change following variables as per requirement
no_of_customers = 70000 # This number should be used across other mock data scripts
# max_city_index = 50000 # cannot be more than 53375


g = Generic('en')
_ = Field('en')

def calculate_age(d):
    y = pydate.today()
    age = round(abs((y-d).days/365))
    return age

genderList = ['MALE','FEMALE','OTHER']
genderDist = [43,57,10]

with open('customers.csv', 'w', newline='') as csvfile:
    fieldnames = ['customer_id','uuid', 'salutation', 'first_name', 'last_name', 'gender', 'phone_number', 'email_id', 'birthdate', 'age','education_level','marital_status','number_of_children','register_date','occupation','annual_income','employment_status','hobbies','home_ownership','degree_of_loyalty','benefits_sought','personality','user_status','social_class','lifestyle','mailing_street','city','state','country','zip_code']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for i in tqdm(range(no_of_customers)):
        profile = {}
        profile['customer_index'] = 10110+(i+1)
        profile['uuid'] = g.cryptographic.uuid()
        randomGender = random.choices(genderList,genderDist,k=1)[0]
        if randomGender == 'MALE':
            profile['first_name'] = g.person.first_name(gender=Gender.MALE)
            profile['last_name'] = g.person.last_name(gender=Gender.MALE)
            profile['salutation'] = g.person.title(gender=Gender.MALE)
            profile['gender'] = 'MALE'
        elif randomGender == 'FEMALE':
            profile['first_name'] = g.person.first_name(gender=Gender.FEMALE)
            profile['last_name'] = g.person.last_name(gender=Gender.FEMALE)
            profile['salutation'] = g.person.title(gender=Gender.FEMALE)
            profile['gender'] = 'FEMALE'
        else:
            profile['first_name'] = g.person.first_name()
            profile['last_name'] = g.person.last_name()
            profile['salutation'] = g.person.title()
            profile['gender'] = 'OTHER'
        profile['phone_number'] = str(g.person.telephone())
        profile['email_id'] = g.person.email()
        profile['birthdate'] = g.datetime.date(start=1960, end=2010)
        profile['age'] = calculate_age(profile['birthdate'])
        profile['education_level'] = random.choice(["No High School","High School", "Diploma","Associate's Degree","Bachelor's Degree","Graduate Degree","Postgraduate Degree"])
        profile['marital_status'] = random.choice(["Divorced","Married","Other","Separated","Single","Unknown","Widowed"])
        if profile['marital_status'] != 'Single':
            profile['number_of_children'] = random.randint(0,3)
        else:
            profile['number_of_children'] = 0
        start_date = datetime.date(2022,4,1)
        end_date = datetime.date(2023,3,31)
        date_range = [start_date + datetime.timedelta(days=x) for x in range((end_date - start_date).days + 1)]
        profile['register_date'] = random.choice(date_range)
        profile['occupation'] = g.person.occupation()
        profile['annual_income'] = random.choice(['<$15k','$15k-$25k','$25k-$35k','$35k-$50k','$50k-$75k','$75k-$100k','$100k-$150k','$150k-$200k','>$200k'])
        profile['employment_status'] = random.choice(['Seeking Employment','Temporarily Employed','Employed Part-Time','Self-Employed','Employed Full-Time','Pursuing Further Studies'])
        profile['hobbies'] = random.choice(['Blogging','Baking','Photography','Painting','Writing','Dance','Gardening','Excercise','Cooking','Hiking','Hunting','Origami','Singing','Cycling','Juggling','Scuba Diving','Poker','Handicraft'])
        profile['home_ownership'] = random.choice(['own','rent'])
        profile['degree_of_loyalty'] = random.choice(['Hard Core Loyals','Soft Core Loyals','Switchers'])
        profile['benefits_sought'] = random.choice(['Cost Advantage','Functionality','Healthy Lifestyle','Self-Expression'])
        profile['personality'] = random.choice(['Easygoing','Determined and Ambitious'])
        profile['user_status'] = random.choice(['Regular Users','Ex-Users','Non-Users','Potential Users','First-Time Users'])
        profile['social_class'] = random.choice(['Lower Class','Working Class','Middle Class','Upper Class'])
        profile['lifestyle'] = random.choice(['Struggler','Aspirer','Explorer','Succeeder','Reformer'])
        profile['mailing_street'] = str(g.address.address())
        profile['country'] = random.choice(['USA','New Zealand','UK','CANADA'])
        if profile['country'] == 'USA':
            profile['state'] = random.choice(['California','Alaska','Florida'])
        elif profile['country'] == 'New Zealand':
            profile['state'] = random.choice(['Auckland','Wellington'])
        elif profile['country'] == 'UK':
            profile['state'] = random.choice(['England','Wales'])
        else:
            profile['state'] = random.choice(['Manitoba','Ontario','Alberta'])
            
        if profile['state'] == 'California':
            profile['city'] = random.choice(['San Jose','Los Angeles','Fontana'])
        elif profile['state'] == 'Alaska':
            profile['city'] = random.choice(['Angoon','Bettles','Palmer'])
        elif profile['state'] == 'Florida':
            profile['city'] = random.choice(['Miami','Hialeah','Orlando'])
        elif profile['state'] == 'Auckland':
            profile['city'] = random.choice(['Napier','Nelson','Rotorua'])
        elif profile['state'] == 'Wellington':
            profile['city'] = random.choice(['Wellington City','Upper Hutt City','Porirua City'])
        elif profile['state'] == 'England':
            profile['city'] = random.choice(['London','Birmingham','Manchester'])
        elif profile['state'] == 'Wales':
            profile['city'] = random.choice(['Swansea','Caerphilly','Barry'])
        elif profile['state'] == 'Manitoba':
            profile['city'] = random.choice(['Brandon','Dauphin'])
        elif profile['state'] == 'Ontario':
            profile['city'] = random.choice(['Hamilton','Toronto'])
        else:
            profile['city'] = random.choice(['Airdrie','Grande Prairie'])
            
        if profile['city'] == 'San Jose':
            profile['zip_code'] = random.choice(['94088','95101','95116'])
        elif profile['city'] == 'Los Angeles':
            profile['zip_code'] = random.choice(['90001','90007','90015'])
        elif profile['city'] == 'Fontana':
            profile['zip_code'] = random.choice(['92316','92337','92377'])
        elif profile['city'] == 'Angoon':
            profile['zip_code'] = random.choice(['99820','99801'])
        elif profile['city'] == 'Bettles':
            profile['zip_code'] = random.choice(['99726','99501'])
        elif profile['city'] == 'Palmer':
            profile['zip_code'] = random.choice(['99645','99642'])
        elif profile['city'] == 'Miami':
            profile['zip_code'] = random.choice(['33101','33127','33136'])
        elif profile['city'] == 'Hialeah':
            profile['zip_code'] = random.choice(['33002','33011','33142'])
        elif profile['city'] == 'Orlando':
            profile['zip_code'] = random.choice(['32789','32810','32829'])
        elif profile['city'] == 'Napier':
            profile['zip_code'] = random.choice(['4104','4110','4141'])
        elif profile['city'] == 'Nelson':
            profile['zip_code'] = random.choice(['7010','7020'])
        elif profile['city'] == 'Rotorua':
            profile['zip_code'] = random.choice(['3077','3015','3073'])
        elif profile['city'] == 'Wellington City':
            profile['zip_code'] = random.choice(['5012','6022'])
        elif profile['city'] == 'Upper Hutt City':
            profile['zip_code'] = random.choice(['5018','5372','5381'])
        elif profile['city'] == 'Porirua City':
            profile['zip_code'] = random.choice(['5028','5022'])
        elif profile['city'] == 'London':
            profile['zip_code'] = random.choice(['E1 0AA','NW1 0AU ','W1S 1AN'])
        elif profile['city'] == 'Birmingham':
            profile['zip_code'] = random.choice(['B1 1DD','B1 1DG'])
        elif profile['city'] == 'Manchester':
            profile['zip_code'] = random.choice(['M1 1AG','M1 1BB','M1 1DB'])
        elif profile['city'] == 'Swansea':
            profile['zip_code'] = random.choice(['SA1','SA1 1AU'])
        elif profile['city'] == 'Caerphilly':
            profile['zip_code'] = random.choice(['CF14 0LB','CF15 7UQ'])
        elif profile['city'] == 'Barry':
            profile['zip_code'] = random.choice(['CF62','CF62 5AA'])
        elif profile['city'] == 'Brandon':
            profile['zip_code'] = random.choice(['R7A 0A3','R7A 0A8'])
        elif profile['city'] == 'Dauphin':
            profile['zip_code'] = random.choice(['R7N 0A4','R7N 0A8'])
        elif profile['city'] == 'Hamilton':
            profile['zip_code'] = random.choice(['L8B','L9K','L0R'])
        elif profile['city'] == 'Toronto':
            profile['zip_code'] = random.choice(['M1L - M9N','M3C - M9N'])
        elif profile['city'] == 'Airdrie':
            profile['zip_code'] = random.choice(['T4A 0A5','T4A 0B2'])
        elif profile['city'] == 'Grande Prairie':
            profile['zip_code'] = random.choice(['T8V 0A6','T8V 0A2','T8V 0A9'])
            
        writer.writerow({'customer_id':profile['customer_index'], 'uuid':profile['uuid'], 'salutation':profile['salutation'], 'first_name':profile['first_name'], 'last_name':profile['last_name'], 'gender':profile['gender'], 'phone_number':profile['phone_number'], 'email_id':profile['email_id'], 'birthdate':profile['birthdate'], 'age':profile['age'], 'education_level':profile['education_level'], 'marital_status':profile['marital_status'], 'number_of_children':profile['number_of_children'], 'register_date': profile['register_date'],'occupation':profile['occupation'], 'annual_income':profile['annual_income'], 'employment_status':profile['employment_status'], 'hobbies':profile['hobbies'], 'home_ownership':profile['home_ownership'], 'degree_of_loyalty':profile['degree_of_loyalty'], 'benefits_sought':profile['benefits_sought'], 'personality':profile['personality'], 'user_status':profile['user_status'], 'social_class':profile['social_class'], 'lifestyle':profile['lifestyle'], 'mailing_street':profile['mailing_street'],'city':profile['city'],'state':profile['state'],'country':profile['country'],'zip_code':profile['zip_code']})

csvfile.close()

  0%|                                                 | 0/70000 [00:00<?, ?it/s]


NameError: name 'datetime' is not defined