Setup

In [1]:
import pandas as pd
import sqlite3
from pathlib import Path

# Define paths
data_folder = Path("Dummy_Data")  # Adjust to where your dummy CSVs are stored
db_path = Path("redezeit_dummy.db")

# Connect to the SQLite database (creates it if it doesn't exist)
conn = sqlite3.connect(db_path)
print(f"Connected to SQLite DB at {db_path.resolve()}")


Connected to SQLite DB at C:\Users\Admin\Documents\Data_Craft_2024-25\Projects\Redezeit\Redezeit-Analyse\Notebooks\Bernardo\redezeit_dummy.db


Load Dummys

In [5]:
# Paths to dummy data folders
dim_path = Path(r"C:\Users\Admin\Documents\Data_Craft_2024-25\Projects\Redezeit\Redezeit-Analyse\Scripts\Bernado\Dummy_Data\Dummy_Dim")
fact_path = Path(r"C:\Users\Admin\Documents\Data_Craft_2024-25\Projects\Redezeit\Redezeit-Analyse\Scripts\Bernado\Dummy_Data\Dummy_Fact")

# Dictionary with table name (no prefix) mapped to (subfolder path, filename)
csv_files = {
    "dim_channel": (dim_path, "dummy_dim_channel.csv"),
    "dim_continent": (dim_path, "dummy_dim_continent.csv"),
    "dim_country": (dim_path, "dummy_dim_country.csv"),
    "dim_date":(dim_path, "dummy_dim_date.csv"),
    "dim_device":(dim_path, "dummy_dim_device.csv"),
    "dim_event_name":(dim_path, "dummy_dim_event_name.csv"),
    "dim_gender":(dim_path, "dummy_dim_gender.csv"),
    "dim_page":(dim_path, "dummy_dim_page.csv"),
    "dim_region":(dim_path, "dummy_dim_region.csv"),
    "dim_source":(dim_path, "dummy_dim_source.csv"),
    "fact_device_breakdown":(fact_path, "dummy_fact_device_breakdown.csv"),
    "fact_gender_distribution":(fact_path, "dummy_fact_gender_distribution.csv"),
    "fact_geo_data":(fact_path, "dummy_fact_geo_data.csv"),
    "fact_interaction":(fact_path, "dummy_fact_interaction.csv"),
    "fact_landing_page":(fact_path, "dummy_fact_landing_page.csv"),
    "fact_origin_breakdown":(fact_path, "dummy_fact_origin_breakdown.csv"),
    "fact_summary": (fact_path, "dummy_fact_summary.csv"),
    "fact_visit_source": (fact_path, "dummy_fact_visit_source.csv")
}

# Load into dictionary of DataFrames
dfs = {}
for table, (folder, filename) in csv_files.items():
    path = folder / filename
    df = pd.read_csv(path)
    dfs[table] = df
    print(f"{table}: {df.shape[0]} rows loaded from {path.name}")


dim_channel: 5 rows loaded from dummy_dim_channel.csv
dim_continent: 5 rows loaded from dummy_dim_continent.csv
dim_country: 5 rows loaded from dummy_dim_country.csv
dim_date: 30 rows loaded from dummy_dim_date.csv
dim_device: 3 rows loaded from dummy_dim_device.csv
dim_event_name: 3 rows loaded from dummy_dim_event_name.csv
dim_gender: 3 rows loaded from dummy_dim_gender.csv
dim_page: 5 rows loaded from dummy_dim_page.csv
dim_region: 5 rows loaded from dummy_dim_region.csv
dim_source: 4 rows loaded from dummy_dim_source.csv
fact_device_breakdown: 30 rows loaded from dummy_fact_device_breakdown.csv
fact_gender_distribution: 30 rows loaded from dummy_fact_gender_distribution.csv
fact_geo_data: 30 rows loaded from dummy_fact_geo_data.csv
fact_interaction: 30 rows loaded from dummy_fact_interaction.csv
fact_landing_page: 30 rows loaded from dummy_fact_landing_page.csv
fact_origin_breakdown: 30 rows loaded from dummy_fact_origin_breakdown.csv
fact_summary: 30 rows loaded from dummy_fact_su

Write all tables to SQLite

In [6]:
# Write DataFrames to SQLite
for table, df in dfs.items():
    df.to_sql(table, conn, if_exists="replace", index=False)
    print(f"Table '{table}' written to SQLite.")


Table 'dim_channel' written to SQLite.
Table 'dim_continent' written to SQLite.
Table 'dim_country' written to SQLite.
Table 'dim_date' written to SQLite.
Table 'dim_device' written to SQLite.
Table 'dim_event_name' written to SQLite.
Table 'dim_gender' written to SQLite.
Table 'dim_page' written to SQLite.
Table 'dim_region' written to SQLite.
Table 'dim_source' written to SQLite.
Table 'fact_device_breakdown' written to SQLite.
Table 'fact_gender_distribution' written to SQLite.
Table 'fact_geo_data' written to SQLite.
Table 'fact_interaction' written to SQLite.
Table 'fact_landing_page' written to SQLite.
Table 'fact_origin_breakdown' written to SQLite.
Table 'fact_summary' written to SQLite.
Table 'fact_visit_source' written to SQLite.


verify

In [7]:
# Show available tables in the SQLite DB
query = "SELECT name FROM sqlite_master WHERE type='table';"
print(pd.read_sql(query, conn))


                        name
0                dim_channel
1              dim_continent
2                dim_country
3                   dim_date
4                 dim_device
5             dim_event_name
6                 dim_gender
7                   dim_page
8                 dim_region
9                 dim_source
10     fact_device_breakdown
11  fact_gender_distribution
12             fact_geo_data
13          fact_interaction
14         fact_landing_page
15     fact_origin_breakdown
16              fact_summary
17         fact_visit_source


Sample Query

In [8]:
# Example query: total visits by device
pd.read_sql("""
    SELECT Date, Mobile, Tablet, Desktop
    FROM fact_device_breakdown
    ORDER BY Date
    LIMIT 5
""", conn)


Unnamed: 0,Date,Mobile,Tablet,Desktop
0,2025-05-22,608,1334,1642
1,2025-05-23,752,1388,765
2,2025-05-24,521,1362,495
3,2025-05-25,1192,1816,416
4,2025-05-26,1729,1294,1792
