In [101]:
import pandas as pd
from faker import Faker
from datetime import datetime
import numpy as np
import random

fake = Faker()
pd.set_option("display.max_columns", None)
# pd.set_option("display.expand_frame_repr", False)

In [102]:
n = 50
customers = {
        "customer_id": [i+1 for i in range(n)],  
        "first_name": [fake.first_name() for _ in range(n)],
        "middle_name": [fake.first_name() if random.uniform(0,1) > 0.4 else np.nan for _ in range(n)],
        "last_name": [fake.last_name() for _ in range(n)],
        "birthdate": [fake.date_of_birth(minimum_age=18, maximum_age=80) for _ in range(n)],
        "gender_id": [random.choice([1, 2, 3]) for _ in range(n)], 
        "pronounce_id": [random.choice([1, 2, 3]) for _ in range(n)],
        "join_date": [fake.date_time_this_decade() for _ in range(n)],
        "customer_status_id": [random.choice([1, 2, 3]) for _ in range(n)],
        "last_updated": [datetime.now() for _ in range(n)]
    }

df = pd.DataFrame(customers)

In [103]:
print(df)

    customer_id first_name  middle_name   last_name   birthdate  gender_id  pronounce_id                  join_date  customer_status_id               last_updated
0             1      Caleb          NaN       Banks  1988-01-05          1             1 2022-01-06 16:44:06.855060                   2 2025-08-24 19:12:34.137000
1             2     Alicia          NaN       Bruce  1977-06-19          2             3 2023-12-07 20:11:00.982921                   3 2025-08-24 19:12:34.137001
2             3       Ross          NaN    Peterson  1945-06-24          1             3 2020-04-11 09:45:29.954104                   1 2025-08-24 19:12:34.137001
3             4       Emma        Glenn      Tucker  2007-02-19          3             2 2024-05-25 01:34:04.927979                   3 2025-08-24 19:12:34.137002
4             5       Eric          NaN       Weber  1962-10-23          1             1 2020-01-21 04:11:05.556326                   3 2025-08-24 19:12:34.137002
5             6      D

In [104]:
df["middle_name"] = df["middle_name"].fillna("")
print(df)

    customer_id first_name  middle_name   last_name   birthdate  gender_id  pronounce_id                  join_date  customer_status_id               last_updated
0             1      Caleb                    Banks  1988-01-05          1             1 2022-01-06 16:44:06.855060                   2 2025-08-24 19:12:34.137000
1             2     Alicia                    Bruce  1977-06-19          2             3 2023-12-07 20:11:00.982921                   3 2025-08-24 19:12:34.137001
2             3       Ross                 Peterson  1945-06-24          1             3 2020-04-11 09:45:29.954104                   1 2025-08-24 19:12:34.137001
3             4       Emma        Glenn      Tucker  2007-02-19          3             2 2024-05-25 01:34:04.927979                   3 2025-08-24 19:12:34.137002
4             5       Eric                    Weber  1962-10-23          1             1 2020-01-21 04:11:05.556326                   3 2025-08-24 19:12:34.137002
5             6      D

In [105]:
full_names = []

for idx, row in df.iterrows():
    parts = [row["first_name"]]
    if row["middle_name"]:  
        parts.append(row["middle_name"])
    parts.append(row["last_name"])
    full_names.append(" ".join(parts))



df["full_name"] = full_names

df = df.drop("first_name", axis=1)
df = df.drop("middle_name", axis=1)
df = df.drop("last_name", axis=1)
print(df)

    customer_id   birthdate  gender_id  pronounce_id                  join_date  customer_status_id               last_updated                    full_name
0             1  1988-01-05          1             1 2022-01-06 16:44:06.855060                   2 2025-08-24 19:12:34.137000                  Caleb Banks
1             2  1977-06-19          2             3 2023-12-07 20:11:00.982921                   3 2025-08-24 19:12:34.137001                 Alicia Bruce
2             3  1945-06-24          1             3 2020-04-11 09:45:29.954104                   1 2025-08-24 19:12:34.137001                Ross Peterson
3             4  2007-02-19          3             2 2024-05-25 01:34:04.927979                   3 2025-08-24 19:12:34.137002            Emma Glenn Tucker
4             5  1962-10-23          1             1 2020-01-21 04:11:05.556326                   3 2025-08-24 19:12:34.137002                   Eric Weber
5             6  1986-12-16          3             2 2021-07-01 

In [106]:
gender = []
pronounce = []

for idx, row in df.iterrows():
    if row["gender_id"] == 1:
        gender.append("Male")
        pronounce.append("He/Him")
    elif row["gender_id"] == 2:
        gender.append("Female")
        pronounce.append("She/Her")
    else: 
        gender.append("Non-Binary")
        pronounce.append("They/Them")

df["gender"] = gender
df["pronounce"] = pronounce

df = df.drop("gender_id", axis=1)
df = df.drop("pronounce_id", axis=1)

df

Unnamed: 0,customer_id,birthdate,join_date,customer_status_id,last_updated,full_name,gender,pronounce
0,1,1988-01-05,2022-01-06 16:44:06.855060,2,2025-08-24 19:12:34.137000,Caleb Banks,Male,He/Him
1,2,1977-06-19,2023-12-07 20:11:00.982921,3,2025-08-24 19:12:34.137001,Alicia Bruce,Female,She/Her
2,3,1945-06-24,2020-04-11 09:45:29.954104,1,2025-08-24 19:12:34.137001,Ross Peterson,Male,He/Him
3,4,2007-02-19,2024-05-25 01:34:04.927979,3,2025-08-24 19:12:34.137002,Emma Glenn Tucker,Non-Binary,They/Them
4,5,1962-10-23,2020-01-21 04:11:05.556326,3,2025-08-24 19:12:34.137002,Eric Weber,Male,He/Him
5,6,1986-12-16,2021-07-01 08:07:12.419172,1,2025-08-24 19:12:34.137003,David Jeremiah Sloan,Non-Binary,They/Them
6,7,1990-10-03,2025-07-14 18:38:15.248652,1,2025-08-24 19:12:34.137003,Jenna Cook,Male,He/Him
7,8,1997-07-27,2022-07-22 18:14:42.458116,1,2025-08-24 19:12:34.137003,Justin Schultz,Female,She/Her
8,9,1997-03-07,2025-04-05 01:06:52.212418,3,2025-08-24 19:12:34.137004,Paul Michael Myers,Non-Binary,They/Them
9,10,1964-02-09,2023-04-13 11:53:57.477470,1,2025-08-24 19:12:34.137004,Ashley Martin,Female,She/Her


In [107]:
df["join_date"] = df["join_date"].dt.date
df

Unnamed: 0,customer_id,birthdate,join_date,customer_status_id,last_updated,full_name,gender,pronounce
0,1,1988-01-05,2022-01-06,2,2025-08-24 19:12:34.137000,Caleb Banks,Male,He/Him
1,2,1977-06-19,2023-12-07,3,2025-08-24 19:12:34.137001,Alicia Bruce,Female,She/Her
2,3,1945-06-24,2020-04-11,1,2025-08-24 19:12:34.137001,Ross Peterson,Male,He/Him
3,4,2007-02-19,2024-05-25,3,2025-08-24 19:12:34.137002,Emma Glenn Tucker,Non-Binary,They/Them
4,5,1962-10-23,2020-01-21,3,2025-08-24 19:12:34.137002,Eric Weber,Male,He/Him
5,6,1986-12-16,2021-07-01,1,2025-08-24 19:12:34.137003,David Jeremiah Sloan,Non-Binary,They/Them
6,7,1990-10-03,2025-07-14,1,2025-08-24 19:12:34.137003,Jenna Cook,Male,He/Him
7,8,1997-07-27,2022-07-22,1,2025-08-24 19:12:34.137003,Justin Schultz,Female,She/Her
8,9,1997-03-07,2025-04-05,3,2025-08-24 19:12:34.137004,Paul Michael Myers,Non-Binary,They/Them
9,10,1964-02-09,2023-04-13,1,2025-08-24 19:12:34.137004,Ashley Martin,Female,She/Her


In [108]:
sts_map = {1:"Active",2: "Inactive",3: "Blocked",4: "departed"}

df["customer_status"] = df["customer_status_id"].map(sts_map)
df = df.drop("customer_status_id", axis=1)

df

Unnamed: 0,customer_id,birthdate,join_date,last_updated,full_name,gender,pronounce,customer_status
0,1,1988-01-05,2022-01-06,2025-08-24 19:12:34.137000,Caleb Banks,Male,He/Him,Inactive
1,2,1977-06-19,2023-12-07,2025-08-24 19:12:34.137001,Alicia Bruce,Female,She/Her,Blocked
2,3,1945-06-24,2020-04-11,2025-08-24 19:12:34.137001,Ross Peterson,Male,He/Him,Active
3,4,2007-02-19,2024-05-25,2025-08-24 19:12:34.137002,Emma Glenn Tucker,Non-Binary,They/Them,Blocked
4,5,1962-10-23,2020-01-21,2025-08-24 19:12:34.137002,Eric Weber,Male,He/Him,Blocked
5,6,1986-12-16,2021-07-01,2025-08-24 19:12:34.137003,David Jeremiah Sloan,Non-Binary,They/Them,Active
6,7,1990-10-03,2025-07-14,2025-08-24 19:12:34.137003,Jenna Cook,Male,He/Him,Active
7,8,1997-07-27,2022-07-22,2025-08-24 19:12:34.137003,Justin Schultz,Female,She/Her,Active
8,9,1997-03-07,2025-04-05,2025-08-24 19:12:34.137004,Paul Michael Myers,Non-Binary,They/Them,Blocked
9,10,1964-02-09,2023-04-13,2025-08-24 19:12:34.137004,Ashley Martin,Female,She/Her,Active


In [109]:
today = pd.Timestamp("today")

ages = []
age_group = []
df["birthdate"] = pd.to_datetime(df["birthdate"])
for idx, row in df.iterrows():
    ages.append(today.year - row["birthdate"].year - 
                ((today.month, today.day) < (row["birthdate"].month, row["birthdate"].day)))
    if ages[-1] >=18 and ages[-1] <=28:
        age_group.append("18 - 25")
    elif ages[-1] >=26 and ages[-1] <=30:
        age_group.append("26 - 30")
    elif ages[-1] >=31 and ages[-1] <=39:
        age_group.append("31 - 39")
    else:
        age_group.append("40+")


df["age"] = ages
df["age_group"] = age_group

df = df.drop("birthdate", axis=1)

df




Unnamed: 0,customer_id,join_date,last_updated,full_name,gender,pronounce,customer_status,age,age_group
0,1,2022-01-06,2025-08-24 19:12:34.137000,Caleb Banks,Male,He/Him,Inactive,37,31 - 39
1,2,2023-12-07,2025-08-24 19:12:34.137001,Alicia Bruce,Female,She/Her,Blocked,48,40+
2,3,2020-04-11,2025-08-24 19:12:34.137001,Ross Peterson,Male,He/Him,Active,80,40+
3,4,2024-05-25,2025-08-24 19:12:34.137002,Emma Glenn Tucker,Non-Binary,They/Them,Blocked,18,18 - 25
4,5,2020-01-21,2025-08-24 19:12:34.137002,Eric Weber,Male,He/Him,Blocked,62,40+
5,6,2021-07-01,2025-08-24 19:12:34.137003,David Jeremiah Sloan,Non-Binary,They/Them,Active,38,31 - 39
6,7,2025-07-14,2025-08-24 19:12:34.137003,Jenna Cook,Male,He/Him,Active,34,31 - 39
7,8,2022-07-22,2025-08-24 19:12:34.137003,Justin Schultz,Female,She/Her,Active,28,18 - 25
8,9,2025-04-05,2025-08-24 19:12:34.137004,Paul Michael Myers,Non-Binary,They/Them,Blocked,28,18 - 25
9,10,2023-04-13,2025-08-24 19:12:34.137004,Ashley Martin,Female,She/Her,Active,61,40+


In [110]:

customers = pd.DataFrame({
    "customer_id": [1, 2],
    "first_name": ["Alice", "Bob"],
    "gender_id": [1, 2],
    "pronounce_id": [2, 1],
    "customer_status_id": [1, 2]
})

genders = pd.DataFrame({
    "gender_id": [1, 2],
    "gender": ["Female", "Male"]
})

pronouns = pd.DataFrame({
    "pronounce_id": [1, 2],
    "pronounce": ["He/Him", "She/Her"]
})

statuses = pd.DataFrame({
    "customer_status_id": [1, 2],
    "status": ["Active", "Inactive"]
})

customers = customers.merge(
        genders, on="gender_id", how="left"
    )

customers

Unnamed: 0,customer_id,first_name,gender_id,pronounce_id,customer_status_id,gender
0,1,Alice,1,2,1,Female
1,2,Bob,2,1,2,Male
