In [9]:
import numpy as np
import pandas as pd
from faker import Faker



In [3]:
class CustomerDataGeneration:
    def __init__(self,faker_locale,seed,count_to_generate):
        self.faker_seed = seed
        self.numpy_seed = seed
        self.count_to_generate = count_to_generate
        self.faker_locale = faker_locale
        self.fake = Faker(self.faker_locale)
        # Data block
        self.names = []
        self.first_name = []
        self.last_name = []
        self.date_of_birth = [] #   20 < date < 82 years
        self.gender = []
        self.email = []
        self.phone_number = []
        self.agree_for_promo = []
        self.autopay_card = []
        self.customer_data_frame = pd.DataFrame(columns = ['first_name',
                                                           'last_name',
                                                           'date_of_birth',
                                                           'gender',
                                                           'email',
                                                           'phone_number',
                                                           'agree_for_promo',
                                                           'autopay_card'])
    def generate_names(self):
        self.names = []   
        for _ in range(self.count_to_generate):
            self.names.append(self.fake.name())
        self._split_names()
    
    def generate_gender(self, prob_M = 0.487, prob_F = 0.513):
        gender_list = self._dist_data_gen(gen_mask = ['M','F'],probs = [prob_M, prob_F])
        self.gender = list(gender_list)
             
    def generate_email(self):
        self.email = []   
        for _ in range(self.count_to_generate):
            self.email.append(self.fake.bothify(text='?*******#')+"@"+self.fake.free_email_domain())
    
    def generate_agree_for_promo(self, prob_Y = 0.33, prob_N = 0.67):
        self.agree_for_promo =[]
        self.agree_for_promo = list(self._dist_data_gen(gen_mask = ['Yes', 'No'], probs = [prob_Y, prob_N]))
    
    def generate_autopay_card(self,prob_Y=0.6365):
        self.autopay_card = []
        self.autopay_card = list(self._dist_data_gen(gen_mask = ['Yes', 'No'], probs = [prob_Y, 1-prob_Y]))
    
    def generate_birth_date(self):
        self.date_of_birth = []   
        for _ in range(self.count_to_generate):
            self.date_of_birth.append(self.fake.date_between(start_date='-82y', end_date='-15y'))
    
    def generate_phone_number(self):
        self.phone_number = []
        for _ in range(self.count_to_generate):
            self.phone_number.append(self.fake.numerify(text='90-####-####'))
            
    def _split_names(self):
        self.first_name = []
        self.last_name = []
        for i in range(len(self.names)):
            self.first_name.append(self.names[i].split(' ')[0])
            self.last_name.append(self.names[i].split(' ')[1])
    
    def create_data_frame(self):
        self.customer_data_frame['first_name'] = self.first_name
        self.customer_data_frame['last_name'] = self.last_name
        self.customer_data_frame['date_of_birth'] = self.date_of_birth
        self.customer_data_frame[ 'gender'] = self.gender
        self.customer_data_frame['email'] = self.email
        self.customer_data_frame['phone_number'] = self.phone_number
        self.customer_data_frame['agree_for_promo'] = self.agree_for_promo
        self.customer_data_frame['autopay_card'] = self.autopay_card
        return self.customer_data_frame
                                                           
    def generate_all_data(self):
        self.generate_names()
        self.generate_birth_date()
        self.generate_gender()
        self.generate_email()
        self.generate_phone_number()
        self.generate_agree_for_promo()
        self.generate_autopay_card()
        
    def _dist_data_gen(self,gen_mask=[0,1],probs=[0.5,0.5]):
        return np.random.choice(gen_mask, size=self.count_to_generate, p=probs)
         
    def customer_data_print(self):
        print("First Names:",self.first_name,
              "\nLast Names:",self.last_name,
              "\nFull Names:",self.names,
              "\nDate of Birth:",self.date_of_birth,
             "\nGender:",self.gender,
             "\nEmail:",self.email,
             "\nMSISDN",self.phone_number,
             "\nagree_for_promo",self.agree_for_promo,
             "\nautopay_card",self.autopay_card)

In [11]:
cdg = CustomerDataGeneration('ja_JP',0,5)

In [12]:
cdg.generate_names()
cdg.generate_birth_date()
cdg.generate_gender()
cdg.generate_email()

In [13]:
cdg.customer_data_print()

First Names: ['中村', '藤田', '吉田', '加藤', '加藤'] 
Last Names: ['知実', 'くみ子', '七夏', '和也', '香織'] 
Full Names: ['中村 知実', '藤田 くみ子', '吉田 七夏', '加藤 和也', '加藤 香織'] 
Date of Birth: [datetime.date(1977, 7, 28), datetime.date(1989, 9, 14), datetime.date(1996, 8, 31), datetime.date(1954, 10, 26), datetime.date(1948, 1, 19)] 
Gender: ['F', 'M', 'F', 'M', 'F'] 
Email: ['yosukesakamoto@yahoo.com', 'fyoshida@hotmail.com', 'takumaogawa@hotmail.com', 'fnakamura@yahoo.com', 'maisasaki@gmail.com']


In [7]:
values, counts = np.unique(cdg._dist_data_gen(gen_mask = ['sas','sus','kek'],probs=[0.1,0.7,0.2]), return_counts=True)

In [8]:
print("push from jupyterlab")

push from jupyterlab
