In [1]:
import polars as pl

In [80]:
userDF = pl.read_json('users-extracted.json')
userEventDF = pl.read_json('user_events-extracted.json')
userTransDF = pl.read_json('user_transactions-extracted.json')
fbDF = pl.read_json('facebook_ads-extracted.json')
igDF = pl.read_json('instagram_ads-extracted.json')

In [81]:
ctx = pl.SQLContext().register_many({'users':userDF, 'user_events':userEventDF, 'user_transactions':userTransDF, 'fb':fbDF, 'ig':igDF})

In [89]:
dim_user = ctx.execute('''
SELECT
id AS user_id,
client_id,
first_name || ' ' || last_name AS full_name,
email,
dob,
gender,
register_date,
CASE
    WHEN fb.id IS NOT NULL THEN 'Facebook'
    WHEN ig.id IS NOT NULL THEN 'Instagram'
    ELSE 'Unknown'
END AS ads_source,
FROM users u
LEFT JOIN fb on fb.id = u.client_id
LEFT JOIN ig on ig.id = u.client_id
''', eager=True)

dim_user.write_json('dim_user.json', row_oriented=True)

In [94]:
dim_ads = ctx.execute('''
SELECT
'Facebook' AS ads_source, id, ads_id, device_type, device_id, timestamp
FROM fb
UNION ALL
SELECT
'Instagram' AS ads_source, id, ads_id, device_type, device_id, timestamp
FROM ig
''', eager=True)
dim_ads.write_json('dim_ads.json', row_oriented=True)

In [162]:
fact_user_performance = ctx.execute('''
SELECT 
    id AS user_id,
    MAX(user_events.timestamp) AS last_activity,
    COUNT(DISTINCT user_events.id) AS total_events,
    COUNT(DISTINCT CASE WHEN user_events.event_type = 'login' THEN user_events.id ELSE NULL END) AS total_logins,
    COUNT(DISTINCT CASE WHEN user_events.event_type = 'search' THEN user_events.id ELSE NULL END) AS total_searches,
    COUNT(DISTINCT CASE WHEN user_events.event_type = 'purchase' THEN user_events.id ELSE NULL END) AS total_purchases,
    SUM(CASE WHEN user_transactions.transaction_type = 'purchase' THEN user_transactions.amount ELSE 0 END) AS total_purchase_amount
    FROM users u
LEFT JOIN user_events on user_events.user_id = u.id
LEFT JOIN user_transactions on user_transactions.user_id = u.id
WHERE user_events.timestamp IS NOT NULL
GROUP BY id
''', eager=True)
fact_user_performance.write_json('fact_user_performance.json', row_oriented=True)

In [155]:
fact_ads_performance = ctx.execute('''
SELECT
    a.ads_id,
    COUNT(DISTINCT a.id) AS total_clicks,
    COUNT(DISTINCT CASE WHEN a.ads_source = 'Facebook' THEN a.id END) AS total_facebook_clicks,
    COUNT(DISTINCT CASE WHEN a.ads_source = 'Instagram' THEN a.id END) AS total_instagram_clicks,
    COUNT(DISTINCT CASE WHEN u.id IS NOT NULL THEN a.id END) AS total_converted,
    SUM(CASE WHEN t.transaction_type = 'purchase' THEN 1 ELSE 0 END) AS total_purchases,
    SUM(CASE WHEN t.transaction_type = 'purchase' THEN t.amount ELSE 0 END) AS total_purchase_amount
FROM (
    SELECT id, ads_id, 'Facebook' AS ads_source FROM fb
    UNION ALL
    SELECT id, ads_id, 'Instagram' AS ads_source FROM ig
) AS a
LEFT JOIN dibimbing.user.users AS u ON a.id = u.client_id
LEFT JOIN dibimbing.event."User Event" AS e ON a.ads_id = e.event_data->>'ads_id'
LEFT JOIN dibimbing.user.user_transactions AS t ON u.id = t.user_id AND t.transaction_type = 'purchase'
GROUP BY a.ads_id
''', eager=True)

ComputeError: sql parser error: Expected end of statement, found: ON at Line: 6, Column 17

In [9]:
import polars as pl

user = pl.read_parquet('user_transactions.parquet')
user.write_csv('a')

In [21]:
import requests

response = requests.post('http://host.docker.internal:3000/api/session',
                         json={'username': 'muhammadmuhidin222@gmail.com',
                               'password': 'Metabase94'})
session_id = response.json()['id']
headers = {'X-Metabase-Session': session_id}
response.json()


{'id': '194af1cf-c553-4e0a-a50a-5daaf63b3aba'}

In [19]:
json =
{
  "name": "dibimbing-final-project",
  "cards": [
    {
      "id": 132,
      "collection_id": 34,
      "description": null,
      "display": "bar",
      "name": "How Age is distributed",
      "include_csv": false,
      "include_xls": false,
      "dashboard_card_id": 195,
      "dashboard_id": 13,
      "parameter_mappings": []
    },
    {
      "id": 87,
      "collection_id": 25,
      "description": null,
      "display": "bar",
      "name": "How the Gender is distributed",
      "include_csv": false,
      "include_xls": false,
      "dashboard_card_id": 139,
      "dashboard_id": 13,
      "parameter_mappings": []
    },
    {
      "id": 89,
      "collection_id": 27,
      "description": null,
      "display": "row",
      "name": "Per Device Type",
      "include_csv": false,
      "include_xls": false,
      "dashboard_card_id": 138,
      "dashboard_id": 13,
      "parameter_mappings": []
    },
    {
      "id": 130,
      "collection_id": 34,
      "description": null,
      "display": "line",
      "name": "New Dim User per month",
      "include_csv": false,
      "include_xls": false,
      "dashboard_card_id": 196,
      "dashboard_id": 13,
      "parameter_mappings": []
    },
    {
      "id": 95,
      "collection_id": 28,
      "description": null,
      "display": "table",
      "name": "Ads ID by Device Type",
      "include_csv": false,
      "include_xls": false,
      "dashboard_card_id": 141,
      "dashboard_id": 13,
      "parameter_mappings": []
    },
    {
      "id": 94,
      "collection_id": 28,
      "description": null,
      "display": "bar",
      "name": "How the Ads ID is distributed",
      "include_csv": false,
      "include_xls": false,
      "dashboard_card_id": 140,
      "dashboard_id": 13,
      "parameter_mappings": []
    },
    {
      "id": 82,
      "collection_id": 20,
      "description": null,
      "display": "row",
      "name": "Facebook Ads per Device Type",
      "include_csv": false,
      "include_xls": false,
      "dashboard_card_id": 142,
      "dashboard_id": 13,
      "parameter_mappings": []
    },
    {
      "id": 83,
      "collection_id": 20,
      "description": null,
      "display": "row",
      "name": "Facebook Ads per Ads ID",
      "include_csv": false,
      "include_xls": false,
      "dashboard_card_id": 143,
      "dashboard_id": 13,
      "parameter_mappings": []
    },
    {
      "id": 102,
      "collection_id": 32,
      "description": null,
      "display": "line",
      "name": "Fact Daily Event Performance by Timestamp",
      "include_csv": false,
      "include_xls": false,
      "dashboard_card_id": 180,
      "dashboard_id": 13,
      "parameter_mappings": []
    },
    {
      "id": 103,
      "collection_id": 32,
      "description": null,
      "display": "line",
      "name": "Total Purchase Amount over time",
      "include_csv": false,
      "include_xls": false,
      "dashboard_card_id": 181,
      "dashboard_id": 13,
      "parameter_mappings": []
    },
    {
      "id": 106,
      "collection_id": 32,
      "description": null,
      "display": "bar",
      "name": "Timestamp by quarter of the year",
      "include_csv": false,
      "include_xls": false,
      "dashboard_card_id": 182,
      "dashboard_id": 13,
      "parameter_mappings": []
    }
  ],
  "channels": [
    {
      "channel_type": "email",
      "enabled": true,
      "recipients": [
        {
          "email": "muhammadmuhidin222@gmail.com",
          "first_name": "Muhammad",
          "locale": null,
          "last_login": "2023-10-22T15:44:19.316944Z",
          "is_active": true,
          "is_qbnewb": false,
          "updated_at": "2023-10-23T06:03:42.459196",
          "is_superuser": true,
          "login_attributes": null,
          "id": 1,
          "last_name": "Muhidin",
          "date_joined": "2023-10-14T03:20:40.521886Z",
          "sso_source": null,
          "common_name": "Muhammad Muhidin"
        }
      ],
      "details": {},
      "schedule_type": "hourly",
      "schedule_day": "mon",
      "schedule_hour": 8,
      "schedule_frame": "first"
    }
  ],
  "skip_if_empty": false,
  "collection_id": null,
  "parameters": [],
  "dashboard_id": 13
}


response = requests.post(url='http://host.docker.internal:3000/api/pulse/test', headers=headers, json=json)

SyntaxError: invalid syntax (928794399.py, line 1)