In [1]:
%cd /app

/app


In [2]:
import os
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

In [3]:
from gooeysite import wsgi
assert wsgi

from multiprocessing.pool import ThreadPool

from daras_ai_v2.functional import map_parallel
from daras_ai_v2 import db
from pages import UsageDashboard

from django.db import transaction
import streamlit as st
from app_users.models import AppUser
from pages.UsageDashboard import (
    get_all_doc_users,
    get_filtered_auth_users,
)


In [4]:
pool = ThreadPool(1000)
UsageDashboard.pool = pool

## fetch users

In [5]:
doc_users = get_all_doc_users()

2023-06-18 08:26:18.343 
  command:

    streamlit run /root/.cache/pypoetry/virtualenvs/ddgai-9TtSrW0h-py3.10/lib/python3.10/site-packages/ipykernel_launcher.py [ARGUMENTS]


doc users: 26120


In [6]:
auth_users = get_filtered_auth_users(
    user_ids=[doc.id for doc in doc_users],
    exclude_anon=False,
    exclude_disabled=False,
    exclude_team=False,
)

filtered users: 25784


In [7]:
len(auth_users.values())

25784

In [8]:
user_accounts = { acc.id: acc.to_dict() for acc in db.get_collection_ref(db.USERS_COLLECTION).get() }
print("users accounts:", len(user_accounts))

users accounts: 91652


In [9]:
import mimetypes
import requests
import datetime


def init_app_user(user, account) -> "AppUser":
    self = AppUser(uid=user.uid)
    
    # copy data from firebase user
    self.uid = user.uid
    self.is_disabled = user.disabled
    self.display_name = user.display_name or ""
    self.email = user.email
    self.phone_number = user.phone_number
    self.created_at = datetime.datetime.fromtimestamp(
        user.user_metadata.creation_timestamp / 1000, tz=datetime.timezone.utc
    )
    # firebase doesn't provide is_anonymous field, so we have to infer it
    self.is_anonymous = not (user.display_name or user.email or user.phone_number)
    self.balance = account.get("balance") or 0

    return self

In [11]:
users = [
    u
    for uid, user in auth_users.items()
    if (u := init_app_user(user, user_accounts.get(uid,{})))
]

In [12]:
len(users)

25784

## save users to db

In [13]:
# AppUser.objects.all().delete()
for user in users:
    try:
        AppUser.objects.get(uid=user.uid)
    except AppUser.DoesNotExist:
        user.save()
AppUser.objects.count()        

25784

## fetch runs

In [14]:
page_runs = UsageDashboard.fetch_page_runs(set(auth_users.keys()))
len(page_runs)

pages: 28300


28300

In [16]:
run_refs = UsageDashboard.flat_map(
    pool,
    lambda page: page.list_documents(),
    page_runs,
)
print("runs:", len(run_refs))

runs: 195022


In [None]:
runs = pool.map(lambda ref: ref.get(), run_refs)

# Export to datasette

In [None]:
!pip install sqlite_utils

In [None]:
from django import db
db.reset_queries()
db.close_old_connections()

In [None]:
from sqlite_utils import Database

db = Database("export.db", recreate=True)

In [None]:
db["users"].insert_all(AppUser.objects.values(), pk="uid")

In [None]:
from daras_ai_v2.base import StateKeys

def get_updated_at(d):
    try:
        dt = d.pop(StateKeys.updated_at)
    except KeyError:
        return None
    if isinstance(dt, str):
        return datetime.datetime.fromisoformat(dt)
    else:
        return datetime.datetime.fromtimestamp(dt.timestamp(), dt.tzinfo)

def state_to_cols(r):
    d = r.to_dict()
    return dict(
        page_slug=r.reference.parent.id, 
        run_id=r.id,
        uid=r.reference.parent.parent.id,
        updated_at=get_updated_at(d),
        error_msg=d.pop(StateKeys.error_msg, None),
        run_time=d.pop(StateKeys.run_time, None),
        run_status=d.pop(StateKeys.run_status, None),
        page_title=d.pop(StateKeys.page_title, None),
        page_notes=d.pop(StateKeys.page_notes, None),
        state=d, 
    )    

db["runs"].insert_all([ state_to_cols(r) for r in runs ], foreign_keys=[("uid", "users", "uid")])

In [None]:
db["runs"].columns

In [None]:
!ls -lh export.db

## save user photos

In [None]:
def fetch_photo(user):
    if not user.photo_url or self.photo_url:
        return
    try:
        self = AppUser.objects.get(uid=user.uid)
    except AppUser.DoesNotExist:
        return
    response = requests.get(user.photo_url)
    if not response.ok or self.photo_url:
        return
    ext = mimetypes.guess_extension(response.headers["Content-Type"]) or ""
    self.photo_url = upload_file_from_bytes(
        f"user_photo_{user.uid}{ext}", response.content
    )
pool.apply_async(fetch_photo, auth_users.values())