In [1]:
import numpy as np
import pandas as pd
from faker import Faker



In [2]:
class CustomerDataGeneration:
    def __init__(self,faker_locale,seed,count_to_generate):
        self.faker_seed = seed
        self.numpy_seed = seed
        self.count_to_generate = count_to_generate
        self.faker_locale = faker_locale
        self.fake = Faker(self.faker_locale)
        # Data block
        self.names = []
        self.first_name = []
        self.last_name = []
        self.date_of_birth = [] #   20 < date < 82 years
        self.gender = []
        self.email = []
    
    def generate_names(self):
        self.names = []   
        for _ in range(self.count_to_generate):
            self.names.append(self.fake.name())
        self._split_names()
    
    def generate_gender(self):
        gender_list = self._dist_data_gen(gen_mask = ['M','F'],probs=[0.487,0.513])
        self.gender = list(gender_list)
             
    def generate_email(self):
        self.email = []   
        for _ in range(self.count_to_generate):
            self.email.append(self.fake.free_email())
            
    def generate_birth_date(self):
        self.date_of_birth = []   
        for _ in range(self.count_to_generate):
            self.date_of_birth.append(self.fake.date_between(start_date='-82y', end_date='-20y'))
            
    def _split_names(self):
        self.first_name = []
        self.last_name = []
        for i in range(len(self.names)):
            self.first_name.append(self.names[i].split(' ')[0])
            self.last_name.append(self.names[i].split(' ')[1])
    
    def _dist_data_gen(self,gen_mask=[0,1],probs=[0.5,0.5]):
        return np.random.choice(gen_mask, size=self.count_to_generate, p=probs)
         
    def customer_data_print(self):
        print("First Names:",self.first_name,
              "\nLast Names:",self.last_name,
              "\nFull Names:",self.names,
              "\nDate of Birth:",self.date_of_birth,
             "\nGender:",self.gender,
             "\nEmail:",self.email)

In [10]:
cdg = CustomerDataGeneration('ja_JP',0,5)

In [11]:
cdg.generate_names()
cdg.generate_birth_date()
cdg.generate_gender()
cdg.generate_email()

In [12]:
cdg.customer_data_print()

First Names: ['林', '斎藤', '松田', '佐々木', '佐々木'] 
Last Names: ['明美', '直人', '香織', '陽一', 'さゆり'] 
Full Names: ['林 明美', '斎藤 直人', '松田 香織', '佐々木 陽一', '佐々木 さゆり'] 
Date of Birth: [datetime.date(1949, 8, 26), datetime.date(1975, 5, 30), datetime.date(1944, 11, 10), datetime.date(1949, 1, 12), datetime.date(1999, 4, 21)] 
Gender: ['M', 'M', 'M', 'M', 'M'] 
Email: ['atsushi20@hotmail.com', 'saitoyumiko@yahoo.com', 'sasakimituru@hotmail.com', 'qkobayashi@yahoo.com', 'taichitanaka@hotmail.com']


In [13]:
values, counts = np.unique(cdg._dist_data_gen(gen_mask = ['sas','sus','kek'],probs=[0.1,0.7,0.2]), return_counts=True)

In [7]:
values

array(['kek', 'sas', 'sus'], dtype='<U3')

In [8]:
counts

array([ 4,  1, 15], dtype=int64)