# Dummy data generation test area.

Here i'll be testing the code i'll be using to generate dummy data

In [10]:
# Libraries
import numpy as np
import pandas as pd
import yaml
from faker import Faker
from datetime import datetime

In [None]:
# Loading Config
with open("./data_generation/config.yaml", "r") as stream:
    config = yaml.safe_load(stream)

np.random.seed(config["random_seed"])
Faker.seed(config["random_seed"])

output_dir = "./data/processed/"

In [12]:
# Generating dim_date
dates = pd.date_range(
    config["date_range"]["start"],
    config["date_range"]["end"],
    freq="D"
)

dim_date = pd.DataFrame({
    "date_id": range(1, len(dates) + 1),
    "date": dates,
    "day": dates.day,
    "month": dates.month,
    "month_name": dates.month_name(),
    "quarter": dates.quarter,
    "year": dates.year,
    "week": dates.isocalendar().week
})

dim_date.to_csv(output_dir + "dim_date.csv", index=False)

In [None]:
# Generate dim_channel
channels = list(config['channels'].keys())

dim_channel = pd.DataFrame({
    "channel_id": range(1, len(channels) + 1),
    "channel_name": channels
})

dim_channel.to_csv(output_dir + "dim_channel.csv", index=False)

channel_id_map = dict(zip(dim_channel.channel_name, dim_channel.channel_id))

In [14]:
print(channel_id_map)

{'Paid Search': 1, 'Paid Social': 2, 'Programmatic': 3, 'Organic': 4}


In [15]:
# Generating dim_data_source
data_sources = []
data_source_id = 1

for channel, sources in config["data_sources"].items():
    for source in sources:
        data_sources.append({
            "data_source_id": data_source_id,
            "data_source_name": source,
            "platform_type": channel
        })
        data_source_id += 1

dim_data_source = pd.DataFrame(data_sources)
dim_data_source.to_csv(output_dir + "dim_data_source.csv", index=False)

In [None]:
# Generate dim_campaign
campaigns = []
campaign_id = 1
fake = Faker()

for channel, campaign_cfg in config["campaigns"].items():
    for i in range(campaign_cfg["count"]):
        campaigns.append({
            "campaign_id": campaign_id,
            "campaing_name": fake.bs(),
            "channel_id": channel_id_map[channel],
            "objective": np.random.choice(campaign_cfg["objectives"]),
            "start_date": config['date_range']['start'],
            "end_date": config["date_range"]['end'],
            "status": "Active"
        })
        campaign_id += 1

dim_campaign = pd.DataFrame(campaigns)
dim_campaign.to_csv(output_dir + "dim_campaign.csv", index=False)

In [None]:
# Generating campaing level data