# <center>Local SqLite mock ads database to test SQL queries and visualizations</center>

In [1]:
# import sqlite3

In [2]:
%load_ext sql

- create and connect to database for adding mock data

In [3]:
%sql sqlite:///mock_ad_data.db

- enable foreign keys (specific to SQLite)

In [4]:
%sql PRAGMA foreign_keys = ON

 * sqlite:///mock_ad_data.db
Done.


[]

- create tables and schema

In [5]:
%%sql

DROP TABLE IF EXISTS impressions;
CREATE TABLE impressions (
    impression_id TEXT PRIMARY KEY,
    url_address TEXT,
    user_id TEXT,
    request_country TEXT,
    tracking_type TEXT,
    dynamic_display BOOLEAN,
    dynamic_display_variables TEXT,
    request_browser_name TEXT,
    timestamp DATE
);

DROP TABLE IF EXISTS clicks;
CREATE TABLE clicks (
    impression_id INTEGER,
    user_id INTEGER,
    timestamp TEXT,
    FOREIGN KEY (impression_id) REFERENCES impressions (impression_id)
);

DROP TABLE IF EXISTS conversions;
CREATE TABLE conversions (
    conversion_id TEXT PRIMARY KEY,
    user_id TEXT,
    dval INTEGER,
    curr TEXT,
    timestamp DATE
);

 * sqlite:///mock_ad_data.db
Done.
Done.
Done.
Done.
Done.
Done.


[]

- create random data for tables
- 150 impressions
- 60% chance of clicks
- 35% chance of conversion

In [6]:
import random
from datetime import datetime, timedelta

# mock data for impressions
countries = ["Austria", "Germany", "France", "Italy"]
tracking_types = ["cookie-based", "fingerprinted"]
dynamic_display_vars = ["soccer", "baseball", "basketball", "tennis", "volleyball"]
browsers = ["Chrome", "Firefox", "Safari", "Edge"]
currencies = ['EUR', 'USD', 'NOK']
start_date = datetime(2022, 11, 21)

impressions_data = []
clicks_data = []
conversions_data = []

for i in range(150):
    imp_id = str(i + 1)  # Ensure imp_id is a number stored as a string to match the clicks
    url = f"http://example{i%10+1}.com"
    user_id = str(random.randint(1, 70)) #randomly choose from 70 users as a string
    country = random.choice(countries)
    tracking_type = random.choice(tracking_types)
    dynamic_display = random.choice([True, False])
    dynamic_display_var = random.choice(dynamic_display_vars)
    browser = random.choice(browsers)
    timestamp = start_date + timedelta(days=random.randint(0, 120)) #random date within 4 months from nov 2022
    
    impressions_data.append((imp_id, url, user_id, country, tracking_type, dynamic_display, dynamic_display_var, browser, timestamp.strftime('%Y-%m-%d')))

    # Randomly decide if this impression gets a click
    # the clicks table datatypes are different int, int and string
    
    if random.random() < 0.6:
        clicks_data.append((int(imp_id), int(user_id), str(timestamp.strftime('%Y-%m-%d'))))
        
        # Randomly decide if this click results in a conversion
        if random.random() < 0.35:
            conv_id = f"conv{i+1}"
            dval = random.randint(10, 1000)
            curr = random.choice(currencies)
            conversions_data.append((conv_id, user_id, dval, curr, timestamp.strftime('%Y-%m-%d')))

impressions_inserts = [
    f"""INSERT INTO impressions 
    (impression_id, url_address, user_id, request_country, tracking_type, dynamic_display, dynamic_display_variables, request_browser_name, timestamp) 
    VALUES ('{imp_id}', '{url}', '{user_id}', '{country}', '{tracking_type}', {str(dynamic_display).lower()}, '{dynamic_display_var}', '{browser}', '{timestamp}');
    """
    for imp_id, url, user_id, country, tracking_type, dynamic_display, dynamic_display_var, browser, timestamp in impressions_data
]

clicks_inserts = [
    f"""INSERT INTO clicks 
    (impression_id, user_id, timestamp) 
    VALUES ('{imp_id}', {user_id}, '{timestamp}');
    """
    for imp_id, user_id, timestamp in clicks_data
]

conversions_inserts = [
    f"""INSERT INTO conversions 
    (conversion_id, user_id, dval, curr, timestamp) 
    VALUES ('{conv_id}', '{user_id}', {dval}, '{curr}', '{timestamp}');
    """
    for conv_id, user_id, dval, curr, timestamp in conversions_data
]

# Combine all insert statements
all_inserts = impressions_inserts + clicks_inserts + conversions_inserts

- The random assignment results in:
- impressions: 150 recors
- Clicks: 87
- Conversions: 28

In [7]:
# Insert data into tables using %%sql magic
for insert_query in all_inserts:
    %sql {insert_query}

# Fetch and print the results for verification
print("Verifying counts...")

impressions_count = %sql SELECT COUNT(*) FROM impressions;
clicks_count = %sql SELECT COUNT(*) FROM clicks;
conversions_count = %sql SELECT COUNT(*) FROM conversions;

print(f"Impressions: {impressions_count[0][0]}")
print(f"Clicks: {clicks_count[0][0]}")
print(f"Conversions: {conversions_count[0][0]}")

 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 rows affected.
 * sqlite:///mock_ad_data.db
1 row

In [8]:
%%sql
select * from conversions

 * sqlite:///mock_ad_data.db
Done.


conversion_id,user_id,dval,curr,timestamp
conv2,44,268,EUR,2023-02-03
conv10,68,131,USD,2023-03-21
conv20,32,886,NOK,2023-03-19
conv45,4,969,NOK,2022-12-24
conv48,58,494,EUR,2022-12-14
conv50,31,750,USD,2023-03-05
conv56,62,233,NOK,2023-02-15
conv60,67,120,NOK,2023-01-25
conv61,10,984,EUR,2023-01-18
conv64,2,790,USD,2022-11-23
