In [1]:
import pandas as pd
import numpy as np
import datetime
from faker import Faker
from customer import CustomerDataGeneration

In [235]:
class ProductInstanceGeneration:
    def __init__(self, customer_df, product_df, faker_locale):
        self.customer_df = customer_df
        self.product_df = product_df
        self.faker_locale = faker_locale
        self.fake = Faker(self.faker_locale)
        
        #Data block
        
        self.product_instance_df = pd.DataFrame(columns = ["business_product_instance_id",
                                                          "customer_id",
                                                          "product_id",
                                                          "activation_date",
                                                          "termination_date",
                                                          "Status",
                                                          "distribution_channel"]) # Финальный этап
        
        
    
    def _product_generation(self, customer_id):
        product_type_tariff = self.product_df[self.product_df.product_type == "tariff"]
        customer = self.customer_df[self.customer_df["ID"] == customer_id]
        #print(customer)
        tariff_list = []
        activation_date = []
        termination_date = []
        Status = []
        distribution_channel = []
        #print(self._age_dist(customer.date_of_birth))
        date_since_split = str(customer.customer_since.values[0]).split("-")
        customer_since = datetime.date(int(date_since_split[0]), int(date_since_split[1]), int(date_since_split[2])) # datetime.date(int(date_since_strip[0]),int(date_since_strip[1]),int(date_since_strip[2]))
        age = self._age_dist(customer.date_of_birth)
        
        #print(customer_since)
        if customer_since < datetime.date(2020,1,1): #LTE first
            # Tariff generation
            tariff = self._tariff_lte(age)
            tariff_list.append(int(np.random.choice(tariff[0], size=1, p=tariff[1])))
            activation_date.append(self.fake.date_between(start_date=datetime.date(2010,1,1), end_date=datetime.date(2020,1,1)))
            Status.append("Active")
            distribution_channel.append(np.random.choice(["online","physical_shop","other"],p=[0.2,0.75,0.05]))
            termination_date.append(None)
            # Addon generation
            if np.random.choice([True,False],p=[0.5,0.5]): # Prob for addon
                addon = self._addon_lte(age, str(customer.customer_category.values[0]))
                tariff_list.append(int(np.random.choice(addon[0], size=1, p=addon[1])))
                activation_date.append(self.fake.date_between(start_date=activation_date[-1], end_date=datetime.date(2020,1,1)))
                Status.append("Active")
                distribution_channel.append(np.random.choice(["online","physical_shop","other"],p=[0.2,0.75,0.05]))
                termination_date.append(None)
            
            if np.random.choice([True,False],p=[0.8,0.2]): # Дописать логику для выбора termination и activation_date
                tariff = self._tariff_all(age)
                new_tariff = int(np.random.choice(tariff[0], size=1, p=tariff[1]))
                while(tariff_list[0] == new_tariff): new_tariff = int(np.random.choice(tariff[0], size=1, p=tariff[1]))
                tariff_list.append(new_tariff)
                activation_date.append(self.fake.date_between(start_date=datetime.date(2020,1,1), end_date=datetime.date.today()))
                distribution_channel.append(np.random.choice(["online","physical_shop","other"],p=[0.75,0.2,0.05]))
                Status= ["Inactive" for i in range(len(Status))]
                Status.append("Active")
                termination_date = [activation_date[-1] for i in range(len(termination_date))]
                termination_date.append(None)
                
                    # Addon generation
                if np.random.choice([True,False],p=[0.5,0.5]): # Prob for addon
                    
                    addon = self._addon_all(age, tariff_list[-1], str(customer.customer_category.values[0]))
                    tariff_list.append(int(np.random.choice(addon[0], size=1, p=addon[1])))
                    activation_date.append(self.fake.date_between(start_date=activation_date[-1], end_date=datetime.date.today()))
                    Status.append("Active")
                    distribution_channel.append(np.random.choice(["online","physical_shop","other"],p=[0.2,0.75,0.05]))
                    termination_date.append(None)
                
                
        elif customer_since > datetime.date(2020,1,1): #5G or LTE users
            tariff = self._tariff_all(age)
            tariff_list.append(int(np.random.choice(tariff[0], size=1, p=tariff[1])))
            distribution_channel.append(np.random.choice(["online","physical_shop","other"],p=[0.75,0.2,0.05]))
            activation_date.append(customer_since)
            Status.append("Active")
            termination_date.append(None)
                                           
            if np.random.choice([True,False],p=[0.5,0.5]): # Prob for addon
                
                addon = self._addon_all(age, tariff_list[-1], str(customer.customer_category.values[0]))
                tariff_list.append(int(np.random.choice(addon[0], size=1, p=addon[1])))
                activation_date.append(self.fake.date_between(start_date=activation_date[-1], end_date=datetime.date.today()))
                Status.append("Active")
                distribution_channel.append(np.random.choice(["online","physical_shop","other"],p=[0.2,0.75,0.05]))
                termination_date.append(None)
                                       
        return tariff_list, activation_date, termination_date, Status, distribution_channel  
        #return np.random.choice(product_type_tariff.product_id)
    
    
    def _random_addon_generation(self):
        product_type_addon = self.product_df[self.product_df.product_type == "addon"]
        return np.random.choice(product_type_tariff.product_id)
    
    def _age_dist(self,date_of_birth):
        date = str(date_of_birth.values[0]).split("-")
        birth = datetime.date(int(date[0]), int(date[1]), int(date[2])) 
        age = int((datetime.date.today()-birth).days)//365
        if age <= 25:
            return "zoomer"
        elif age > 25 and age < 55:
            return "doomer"
        elif age >= 55:
            return "boomer"
    
    def _tariff_lte(self, generation):
        if generation == "boomer":
            return [12,5,4], [0.6, 0.3, 0.1]
        elif generation == "doomer":
            return [5, 4, 12], [0.6, 0.3, 0.1]
        elif generation == "zoomer":
            return [4, 5, 12, 13], [0.5, 0.2, 0.25, 0.05]
    
    def _tariff_all(self,generation):
        if generation == "boomer":
            return [6, 8, 5, 12, 3, 4, 1], [0.2, 0.2, 0.2, 0.1, 0.1, 0.1, 0.1]
        elif generation == "doomer":
            return [8, 5, 3, 4, 1, 6, 12], [0.2, 0.2, 0.2, 0.1, 0.1, 0.1, 0.1]
        elif generation == "zoomer":
            return [4, 1, 3, 5, 8, 12, 6, 13], [0.2, 0.2, 0.2, 0.1, 0.1, 0.1, 0.05, 0.05]
            
    def _addon_lte(self, generation, customer_category):# Возможные комбинации дополнений к тарифам
        if customer_category =="business":
            if generation == "boomer":
                return [7, 10, 11], [0.7, 0.29, 0.01]
            elif generation == "doomer":
                return [7, 10, 11], [0.5, 0.49, 0.01]
            elif generation == "zoomer":
                return [7, 10, 11], [0.3, 0.69, 0.01]
        else:
            if generation == "boomer":
                return [7, 10], [0.7,0.3]
            elif generation == "doomer":
                return [7, 10], [0.5,0.5]
            elif generation == "zoomer":
                return [7, 10], [0.3,0.7]
            
    def _addon_all(self, generation, tariff, customer_category):
        if customer_category =="business":
            if generation == "boomer":
                if tariff == 1:
                    return [2, 7], [0.5, 0.5]
                elif tariff == 3:
                    return [2, 9, 7], [0.5,0.25,0.25]
                elif tariff == 6:
                    return [9, 10, 11], [0.5,0.49,0.01]
                elif tariff == 8:
                    return [7, 9, 10, 11], [0.5,0.25,0.24,0.01]
                else:
                    return self._addon_lte(generation, customer_category)
            elif generation == "doomer":
                 if tariff == 1:
                    return [2, 7], [0.5, 0.5]
                 elif tariff == 3:
                    return [2, 9, 7], [0.2,0.4,0.4]
                 elif tariff == 6:
                    return [9, 10, 11], [0.5,0.49,0.01]
                 elif tariff == 8:
                    return [7, 9, 10, 11], [0.3,0.35,0.34,0.01]
                 else:
                    return self._addon_lte(generation, customer_category)
            elif generation == "zoomer":
                 if tariff == 1:
                    return [2, 7], [0.5, 0.5]
                 elif tariff == 3:
                    return [2, 9, 7], [0.25,0.5,0.25]
                 elif tariff == 6:
                    return [9, 10, 11], [0.5,0.49,0.01]
                 elif tariff == 8:
                    return [7, 9, 10, 11], [0.1,0.45,0.44,0.01]
                 else:
                    return self._addon_lte(generation, customer_category)
        else:
            if generation == "boomer":
                if tariff == 1:
                    return [2, 7], [0.5, 0.5]
                elif tariff == 3:
                    return [2, 9, 7], [0.5,0.25,0.25]
                elif tariff == 6:
                    return [9, 10], [0.5,0.5]
                elif tariff == 8:
                    return [7, 9, 10], [0.5,0.25,0.25]
                else:
                    return self._addon_lte(generation, customer_category)
                
            elif generation == "doomer":
                 if tariff == 1:
                    return [2, 7], [0.5, 0.5]
                 elif tariff == 3:
                    return [2, 9, 7], [0.2,0.4,0.4]
                 elif tariff == 6:
                    return [9, 10], [0.5,0.5]
                 elif tariff == 8:
                    return [7, 9, 10], [0.3,0.35,0.35]
                 else:
                    return self._addon_lte(generation, customer_category)
            elif generation == "zoomer":
                 if tariff == 1:
                    return [2, 7], [0.5, 0.5]
                 elif tariff == 3:
                    return [2, 9, 7], [0.25,0.5,0.25]
                 elif tariff == 6:
                    return [9, 10], [0.5,0.5]
                 elif tariff == 8:
                    return [7, 9, 10], [0.1,0.45,0.45]
                 else:
                    return self._addon_lte(generation, customer_category)
            
    def product_inst_for_customer(self,customer_id):
        tariff = self._product_generation(customer_id)
        
        for i in range(len(tariff[0])):
            
            instance = ["None",customer_id,tariff[0][i],tariff[1][i],tariff[2][i],tariff[3][i],tariff[4][i]]
            df_to_add = pd.DataFrame([instance],columns=self.product_instance_df.columns.values)
            
            self.product_instance_df = self.product_instance_df.append(df_to_add,ignore_index=True)
       
            # [["None"],[customer_id],[tariff[0][i]],[tariff[1][i]],[tariff[2][i]],[tariff[3][i]],["unknown"]]
            #self.product_instance_df["customer_id"][i] = customer_id
           # self.product_instance_df["product_id"][i] = tariff[0][i]
            #self.product_instance_df["activation_date"] = tariff[1][i]
            #self.product_instance_df["termination_date"] = tariff[2][i]
            #self.product_instance_df["Status"] = tariff[3][i]
    
    def generate_all(self):
        for i in range(customer_df.shape[0]):
            self.product_inst_for_customer(i)
        self.product_instance_df.business_product_instance_id = np.arange(0,self.product_instance_df.shape[0])
    def save_to_csv(self, file_name="product_instance.csv"):
        """Saves generated data to csv table.
           :param file_name: The name of the file to be written to, defaults to product_instance.csv
           :type file_name: str
        """
        
        self.product_instance_df.to_csv(file_name,index=False)

In [236]:
pig = ProductInstanceGeneration(customer_df,product_df,"ja_JP")

In [222]:
%%time
for i in range(10):
    pig.product_inst_for_customer(i)

Wall time: 167 ms


In [237]:
%%time
pig.generate_all()

Wall time: 3min 58s


In [238]:
pig.save_to_csv()

In [239]:
pig.product_instance_df.head(20)

Unnamed: 0,business_product_instance_id,customer_id,product_id,activation_date,termination_date,Status,distribution_channel
0,0,0,4,2020-03-07,,Active,online
1,1,1,5,2018-02-16,2021-05-25,Inactive,online
2,2,1,6,2021-05-25,,Active,physical_shop
3,3,2,5,2011-12-13,2020-02-24,Inactive,physical_shop
4,4,2,6,2020-02-24,,Active,online
5,5,3,4,2016-11-19,2020-12-15,Inactive,physical_shop
6,6,3,7,2019-10-20,2020-12-15,Inactive,physical_shop
7,7,3,5,2020-12-15,,Active,physical_shop
8,8,4,6,2021-10-04,,Active,online
9,9,5,5,2018-01-09,2022-01-21,Inactive,online


In [57]:
pig.product_id

array([ 5,  6,  3,  4,  3,  5, 13,  6,  3,  6], dtype=int64)

In [59]:
customer_df = pd.read_csv("Customer.csv",index_col = False)
customer_df

Unnamed: 0,ID,first_name,last_name,date_of_birth,gender,email,MSISDN,agree_for_promo,autopay_card,customer_category,language,customer_since,region,status
0,0,さゆり,山崎,2004-05-26,M,C*******8@gmail.com,90-7084-4635,No,Yes,physical,Japanese,2020-03-07,Kantō,active
1,1,直子,渡辺,1993-10-17,F,O*******5@yahoo.com,90-5460-3945,No,Yes,physical,Japanese,2010-11-21,Kantō,active
2,2,舞,青木,1994-01-03,M,v*******5@hotmail.com,90-8598-0092,No,Yes,physical,Japanese,2011-02-05,Kansai(Kinki),inactive(debt)
3,3,舞,鈴木,1987-12-24,F,N*******9@yahoo.com,90-8353-5522,No,Yes,physical,Japanese,2016-06-17,Kyūshū & Okinawa,active
4,4,学,中村,1951-03-16,F,R*******4@hotmail.com,90-7877-8066,No,No,physical,Japanese,2021-10-04,Kantō,active
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9995,裕太,鈴木,1999-06-06,M,f*******0@hotmail.com,90-8957-3148,No,Yes,physical,Japanese,2018-01-24,Kantō,active
9996,9996,翔太,山田,1997-12-24,M,V*******6@hotmail.com,90-0570-4063,No,Yes,physical,Japanese,2022-02-27,Tōhoku,active
9997,9997,康弘,鈴木,1968-10-21,F,i*******8@gmail.com,90-5248-6057,No,Yes,physical,Japanese,2011-07-14,Chūbu,active
9998,9998,晃,村上,1957-01-09,F,D*******2@yahoo.com,90-9060-1773,No,Yes,physical,Japanese,2019-05-20,Kansai(Kinki),active


In [60]:
product_df = pd.read_csv("product.csv",index_col = False)
product_df

Unnamed: 0,product_id,product_name,product_category,product_type,recurrent,cost_for_call,cost_for_sms,cost_for_data,allowance_sms,allowance_voice,allowance_data,total_cost
0,1,5G Gigaho Premier,5G,tariff,regularly,22,3.3,0,,,unlimited,7315
1,2,Kake-hodai Option,5G,addon,regularly,0,0.0,0,,unlimited,,1870
2,3,5G Gigalight,5G,tariff,regularly,22,3.3,0,,,7,6765
3,4,Gigaho Premier,LTE,tariff,regularly,22,3.3,0,,,60,7205
4,5,Gigalight,LTE,tariff,regularly,22,3.3,0,,,7,6765
5,6,Hajimete Sumaho Plan,5G-LTE,tariff,regularly,0,3.3,0,,,1,1980
6,7,Kake-hodai Option (¥1000),5G-LTE,addon,regularly,0,0.0,0,,unlimited,,1100
7,8,U15 Hajimete Sumaho Plan,5G-LTE,tariff,regularly,22,3.3,0,,,5,1980
8,9,5G Data Plus,data only (5G),addon,regularly,0,0.0,0,,,30,1100
9,10,Data Plus,data only (LTE),addon,regularly,0,0.0,0,,,30,1100


In [67]:
product_type_addon = product_df[product_df.product_type == "addon"]
product_type_addon   

Unnamed: 0,product_id,product_name,product_category,product_type,recurrent,cost_for_call,cost_for_sms,cost_for_data,allowance_sms,allowance_voice,allowance_data,total_cost
1,2,Kake-hodai Option,5G,addon,regularly,0,0.0,0,,unlimited,,1870
6,7,Kake-hodai Option (¥1000),5G-LTE,addon,regularly,0,0.0,0,,unlimited,,1100
8,9,5G Data Plus,data only (5G),addon,regularly,0,0.0,0,,,30.0,1100
9,10,Data Plus,data only (LTE),addon,regularly,0,0.0,0,,,30.0,1100
10,11,LTE Jouku Riyou Plan,data only (LTE),addon,regularly,0,0.0,0,,,120.0,49800


In [228]:
product_type_tariff = product_df[product_df.product_type == "tariff"]
product_type_tariff.shape(0)

TypeError: 'tuple' object is not callable

In [214]:
print(datetime.date(2020,1,1))

2020-01-01


In [132]:
t = 4
g_m = [8, 5, 3, 4, 1, 6, 12] 
prob = [0.2, 0.2, 0.2, 0.1, 0.1, 0.1, 0.1]

In [136]:
prob[g_m.index(t)] = 0
print(prob+1)


TypeError: can only concatenate list (not "int") to list

In [134]:
np.random.choice(g_m,size = 1, p = prob)

ValueError: probabilities do not sum to 1

In [None]:
LTE Jouku Riyou Plan	data only (LTE)	addon	regularly	0	0.0	0	NaN	NaN	120	49800

In [142]:
datetime.date.today()

datetime.date(2022, 3, 3)

In [179]:
np.random.randint(1,20,size=10)

array([ 4, 14,  3,  1, 13, 15, 10,  8,  8, 10])