In [3]:
#Importing the necessary libraries
import random #To generate the random numbers
import pandas as pd #To store our dataset

In [24]:
#Set the seed for reproductivity so that the same results are generated every time the code is ran
random.seed(42)

In [25]:
#Defining the probability of making a purchase for each age groups
w_purchase_probs={20: 0.10,30: 0.15, 40: 0.20, 50: 0.30, 60: 0.50, 70: 0.80}

In [26]:
age_groups = list(w_purchase_probs.keys())

In [27]:
#Generate 40,000 data points with the weighted purchase probabilities. It randomly selects an age then determines the purchase probability based on age and weight assigned 
#to the age group. A purchase is simulated:1 for purchase and 0 for no purchase
data =[]
for _ in range(40000):
    age = random.choice(age_groups)
    
    w_purchase_prob = w_purchase_probs[age] 
    
    purchase = random.choices([1,0], weights=[w_purchase_prob, 1-w_purchase_prob],k=1)[0]
    
    data.append((age,purchase))#storing the data as tuples

In [28]:
#Creating a dataframe to house our dataset
df_w_age_purchase= pd.DataFrame(data, columns=['Age', 'Purchased'])

In [29]:
#taking a peek at the data that was generated
print(df_w_age_purchase.head)

<bound method NDFrame.head of        Age  Purchased
0       70          1
1       70          1
2       30          1
3       20          0
4       60          1
...    ...        ...
39995   70          1
39996   60          1
39997   60          0
39998   30          0
39999   30          0

[40000 rows x 2 columns]>


In [30]:
#Checking the number of persons who are 70 year old and total 70 year olds who purchased
print("Total 70-year-olds:", (df_w_age_purchase['Age'] == 70).sum())
print("70-year-olds who purchased:", ((df_w_age_purchase['Age'] == 70) & (df_w_age_purchase['Purchased'] == 1)).sum())


Total 70-year-olds: 6777
70-year-olds who purchased: 5406


In [31]:
#Filter the data for age 70
w_seventy_df = df_w_age_purchase[df_w_age_purchase['Age'] == 70]

In [32]:
#Calculate the probability of randomly purchasing an item, given that the person is 70 years old with a weighted condition
p_purchase_given_70=w_seventy_df['Purchased'].mean()
#Print the results of the weoighted probability
print(f'P(Purchase / Age = 70):{p_purchase_given_70:.4f}')

P(Purchase / Age = 70):0.7977


In [33]:
#Removing the weighted conditions by assigning equal weights to all age groups
purchase_probs={20: 0.20,30: 0.20, 40: 0.20, 50: 0.20, 60: 0.20, 70: 0.20}

In [34]:
#Generate 40,000 data points with the purchase probabilities
data =[]
for _ in range(40000):
    age = random.choice(age_groups)#randomly selects an age
    
    purchase_prob = purchase_probs[age] #Determining the purchase probability based on age and weight
    
    purchase = random.choices([1,0], weights=[purchase_prob, 1-purchase_prob],k=1)[0] #simulating a purchases, 1 for purchase and 0 for no purchase
    
    data.append((age,purchase))#storing the data as tuples

In [18]:
#Creating a dataframe to house our dataset
df_unw_age_purchase= pd.DataFrame(data, columns=['Age', 'Purchased'])

In [20]:
#Checking the number of persons who are 70 year old and total 70 year olds who purchased
print("Total 70-year-olds:", (df_unw_age_purchase['Age'] == 70).sum())
print("70-year-olds who purchased:", ((df_unw_age_purchase['Age'] == 70) & (df_unw_age_purchase['Purchased'] == 1)).sum())


Total 70-year-olds: 6709
70-year-olds who purchased: 1397


In [22]:
#Filter out the data for the 70 age group
uw_seventy_df = df_unw_age_purchase[df_unw_age_purchase['Age'] == 70]

In [23]:
#Calculate the probability of randomly purchasing an item, given that the person is 70 years old without a weighted condition
p_purchase_given_70=uw_seventy_df['Purchased'].mean()
print(f'P(Purchase / Age = 70):{p_purchase_given_70:.4f}')

P(Purchase / Age = 70):0.2082
