In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None
import altair as alt
alt.renderers.enable('mimetype')
alt.data_transformers.disable_max_rows()
import os
import sys
import warnings
warnings.filterwarnings('ignore')

In [2]:
import altair_saver
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

driver = webdriver.Chrome(ChromeDriverManager().install())

In [10]:
import vl_convert as vlc

def save_chart(chart, filename, scale_factor=1):
    '''
    Save an Altair chart using vl-convert
    
    Parameters
    ----------
    chart : altair.Chart
        Altair chart to save
    filename : str
        The path to save the chart to
    scale_factor: int or float
        The factor to scale the image resolution by.
        E.g. A value of `2` means two times the default resolution.
    '''
    with alt.data_transformers.enable("default"), alt.data_transformers.disable_max_rows():
        if filename.split('.')[-1] == 'svg':
            with open(filename, "w") as f:
                f.write(vlc.vegalite_to_svg(chart.to_dict()))
        elif filename.split('.')[-1] == 'png':
            with open(filename, "wb") as f:
                f.write(vlc.vegalite_to_png(chart.to_dict(), scale=scale_factor))
        else:
            raise ValueError("Only svg and png formats are supported")

In [3]:
core_users = pd.read_csv('../data/derived_files/core_users.csv')
core_repos = pd.read_csv('../data/derived_files/core_repos.csv')

In [4]:
core_orgs = core_users[core_users.type == 'Organization']
core_users = core_users[core_users.type == 'User']

In [5]:
core_users['created_at'] = pd.to_datetime(core_users['created_at'], errors='coerce')
core_orgs['created_at'] = pd.to_datetime(core_orgs['created_at'], errors='coerce')
core_repos['created_at'] = pd.to_datetime(core_repos['created_at'], errors='coerce')

In [6]:
core_users['data_type'] = 'User'
core_orgs['data_type'] = 'Organization'
core_repos['data_type'] = 'Repository'

In [45]:
all_data = pd.concat([core_users[['data_type', 'created_at', 'id']], core_orgs[['data_type', 'created_at', 'id']], core_repos[['data_type', 'created_at', 'id']]])

In [42]:
all_data['total_counts'] = 0
all_data.loc[all_data.data_type == 'User', 'total_counts'] = core_users.shape[0]
all_data.loc[all_data.data_type == 'Organization', 'total_counts'] = core_orgs.shape[0]
all_data.loc[all_data.data_type == 'Repository', 'total_counts'] = core_repos.shape[0]

In [48]:
chart = alt.Chart(all_data).mark_line().encode(
    x=alt.X('yearmonth(created_at):T', title='Date Created', axis=alt.Axis(format='%Y')),
    y=alt.Y('count()', title='Counts'),
    color=alt.Color('data_type', title='Type of Data'),
    # text=alt.Text('total_counts', format=',d')
).properties(
    title='Number of Users, Organizations, and Repositories identified as DH, Created Over Time'
)

text = alt.Chart(all_data).mark_text().encode(
    x=alt.X('yearmonth(created_at):T', aggregate='max'),
    y=alt.Y('count()', aggregate='max'),
    text=alt.Text('total_counts:Q', format=',d'),
    color=alt.Color('data_type', title='Type of Data')
)
chart + text
    

<VegaLite 4 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/troubleshooting.html


In [37]:
save_chart(chart, '../outputs/number_of_users_orgs_repos_created_over_time.png', scale_factor=2)

In [39]:
all_data.data_type.value_counts()

User            3966
Repository      2326
Organization     421
Name: data_type, dtype: int64

In [None]:
chart = alt.Chart().mark_line().encode(
    x=alt.X('yearmonth(created_at):T', title='Date Created', axis=alt.Axis(format='%Y')),
    y=alt.Y('count()', title='Counts'),
    color=alt.Color('data_type', title='Type of Data'),
    # text=alt.Text('total_counts', format=',d')
).properties(
    title='Number of Users, Organizations, and Repositories identified as DH, Created Over Time'
)

text = alt.Chart()alt.Chart().mark_text(
    align='center',
    baseline='top',
    color="black",
    dy=-60,
    dx=150  # Nudges text to right so it doesn't appear on top of the bar
).transform_aggregate(
    total='count(data_type)',
    # data_type='argmax(data_type)',
).encode(
    text=alt.Text('text:N'),
    detail=alt.Detail('data_type:N')
    # x=alt.X("yearmonth(date):T", ),
    # y=alt.Y("count",),
    # color="event_type",
    # column="event_type"
).transform_calculate(
    text="Totals: " + alt.datum.total
)
chart + text
    