In [50]:
import faker

fake = faker.Faker()

print(fake.email())
print(fake.address())
print(fake.company())

jwilliams@example.net
149 Tran Plains
Port Todd, MN 39972
Rodriguez and Sons


In [51]:
# Generate a user
import json

users = []
for i in range(10):
    user = {
        "username": fake.unique.user_name(),
        "name": fake.name(),
        "email": fake.email(),
        "address": fake.address(),
        "age": fake.random_int(min=18, max=90)
    }
    users.append(user)

# Save in JSON
with open("users.json", "w") as file:
    json.dump(users, file, indent=4)

In [52]:
# generate ad campaign
import random
from datetime import date, timedelta
# Start and end of the campaign
def get_start_end_dates():
    duration = random.randint(1, 2*365)
    offset = random.randint(-365, 365)
    start = date.today() - timedelta(days = offset)
    end = start + timedelta(days=duration)

    return start.strftime("%Y%m%d"), end.strftime("%Y%m%d")

print(get_start_end_dates())

# Generate target age
def get_age_range():
    age = random.randrange(20, 46, 5)
    diff = random.randrange(5, 26, 5)

    return f"{age}-{age + diff}"

print(get_age_range())

# Generate the currency
def get_currency():
    return random.choice(("GBP", "USD", "EUR"))

# Create campaign name
def get_campaign_name():
    time = get_start_end_dates()[0] + "_" + get_start_end_dates()[1]
    old = "_" + get_age_range()
    coin = "_" + get_currency()
    name = time + old + coin
    return name

print(get_campaign_name())

('20250608', '20270506')
35-45
20251031_20251101_20-40_USD


In [53]:
# Generate the data for the campaign
def get_campaign_data():
    name = get_campaign_name()
    budget = random.randint(10**3, 10**6)
    spent = random.randint(10**2, budget)

    return {
        "cmp_name": name,
        "cmp_bgt": budget,
        "cmp_spent": spent
    }

print(get_campaign_data())

def get_raw_data(users):
    raw_data = []
    for user in users:
        campaigns = []
        for i in range(random.randint(5, 10)):
            campaign_data = get_campaign_data()
            dates = campaign_data["cmp_name"].split("_")[2].split("-")
            min_age = int(dates[0])
            max_age = int(dates[1])
            if (user["age"] >= min_age and user["age"] <= max_age):
                campaigns.append(campaign_data)

        campaigns = [get_campaign_data() for _ in range(random.randint(2, 8))]
        raw_data.append({"user": user, "campaigns": campaigns})
    return raw_data

raw_data = get_raw_data(users)
with open("raw_data.json", "w") as file:
    json.dump(raw_data, file, indent=4)

campaign_data = []
for data in raw_data:
    for campaign in data["campaigns"]:
        campaign["user"] = data["user"]
        campaign_data.append(campaign)

with open("campaigns_data.json", "w") as file:
    json.dump(campaign_data, file, indent=4)

{'cmp_name': '20260721_20270121_25-35_GBP', 'cmp_bgt': 554902, 'cmp_spent': 57318}


In [54]:
import pandas as pd

# DataFrame = Excel spreadsheet in Python
df = pd.read_json("campaigns_data.json")

print(df)

df.head(10)

# Check the size of the DataFrame (rows, cols)
print(df.shape)

#Column names
print(df.columns)

# Types of data
print(df.dtypes)

                       cmp_name  cmp_bgt  cmp_spent  \
0   20250822_20260611_45-65_EUR   509632      45355   
1   20261005_20260613_45-60_USD   996464     975226   
2   20260811_20270309_40-50_EUR   415305     252058   
3   20250911_20260721_30-35_USD   109369      22161   
4   20260422_20270727_30-45_GBP   734430     521870   
5   20250207_20261213_35-55_USD    27218      20409   
6   20251024_20270410_45-70_EUR   463809     261566   
7   20260312_20270511_25-30_USD   113229      26766   
8   20250312_20251207_25-30_USD   613529     342273   
9   20250829_20280107_20-25_EUR    82123      77447   
10  20260414_20271213_35-45_USD   886575     373369   
11  20250120_20260709_45-70_GBP   771817     675063   
12  20241205_20280503_45-60_USD   735697     540070   
13  20250118_20280703_40-50_EUR    41451      40747   
14  20260321_20260410_40-55_USD    72292       2528   
15  20250502_20260828_40-55_GBP   267677     254442   
16  20250309_20260528_30-35_EUR   252516      94630   
17  202504

In [62]:
# Filter the data
print(df.query("cmp_bgt < 1000000 and cmp_spent > 5000"))

                       cmp_name  cmp_bgt  cmp_spent  \
0   20250822_20260611_45-65_EUR   509632      45355   
1   20261005_20260613_45-60_USD   996464     975226   
2   20260811_20270309_40-50_EUR   415305     252058   
3   20250911_20260721_30-35_USD   109369      22161   
4   20260422_20270727_30-45_GBP   734430     521870   
5   20250207_20261213_35-55_USD    27218      20409   
6   20251024_20270410_45-70_EUR   463809     261566   
7   20260312_20270511_25-30_USD   113229      26766   
8   20250312_20251207_25-30_USD   613529     342273   
9   20250829_20280107_20-25_EUR    82123      77447   
10  20260414_20271213_35-45_USD   886575     373369   
11  20250120_20260709_45-70_GBP   771817     675063   
12  20241205_20280503_45-60_USD   735697     540070   
13  20250118_20280703_40-50_EUR    41451      40747   
15  20250502_20260828_40-55_GBP   267677     254442   
16  20250309_20260528_30-35_EUR   252516      94630   
17  20250420_20270201_20-35_GBP   904811       7713   
18  202601

In [61]:
# Get the campaign with max spent
idx = df["cmp_spent"].idxmax()
df.loc[idx]

cmp_name                           20261005_20260613_45-60_USD
cmp_bgt                                                 996464
cmp_spent                                               975226
user         {'username': 'george70', 'name': 'Heather Glov...
Name: 1, dtype: object

In [64]:
df.sort_values(by=["cmp_spent"], ascending=False).head(1)

Unnamed: 0,cmp_name,cmp_bgt,cmp_spent,user
1,20261005_20260613_45-60_USD,996464,975226,"{'username': 'george70', 'name': 'Heather Glov..."


In [None]:
# Add a new column
df["cmp_remaining_bgt"] = df["cmp_bgt"] - df["cmp_spent"]

# Get the start
start_date = df["cmp_name"].str.split("_").str[0]

df["cmp_start_date"] = pd.to_datetime(start_date)

# TODO
# current_date = something
# df["cmp_days_running"] = current_date - start_date

df.head(10)

Unnamed: 0,cmp_name,cmp_bgt,cmp_spent,user,cmp_remaining_bgt,cmp_start_date
0,20250822_20260611_45-65_EUR,509632,45355,"{'username': 'george70', 'name': 'Heather Glov...",464277,2025-08-22
1,20261005_20260613_45-60_USD,996464,975226,"{'username': 'george70', 'name': 'Heather Glov...",21238,2026-10-05
2,20260811_20270309_40-50_EUR,415305,252058,"{'username': 'george70', 'name': 'Heather Glov...",163247,2026-08-11
3,20250911_20260721_30-35_USD,109369,22161,"{'username': 'george70', 'name': 'Heather Glov...",87208,2025-09-11
4,20260422_20270727_30-45_GBP,734430,521870,"{'username': 'george70', 'name': 'Heather Glov...",212560,2026-04-22
5,20250207_20261213_35-55_USD,27218,20409,"{'username': 'george70', 'name': 'Heather Glov...",6809,2025-02-07
6,20251024_20270410_45-70_EUR,463809,261566,"{'username': 'george70', 'name': 'Heather Glov...",202243,2025-10-24
7,20260312_20270511_25-30_USD,113229,26766,"{'username': 'sullivancheryl', 'name': 'Debora...",86463,2026-03-12
8,20250312_20251207_25-30_USD,613529,342273,"{'username': 'sullivancheryl', 'name': 'Debora...",271256,2025-03-12
9,20250829_20280107_20-25_EUR,82123,77447,"{'username': 'amber07', 'name': 'James Atkinso...",4676,2025-08-29


In [69]:
df.to_csv("data.csv")
df.to_excel("data.xlsx")