# Publication types

In [78]:
import sys
sys.path.append('..')
import pandas as pd
from datetime import datetime
from utils import build_mongo_client, test_mongo_client, get_customers_database_infos, get_pubs, get_pub_type, PUB_TYPE, get_learners_group

In [79]:
client = build_mongo_client(env="production")
test_mongo_client(client)

Pinged your deployment. You successfully connected to MongoDB!


In [80]:
customers_info = get_customers_database_infos(client)
customers_info_dict = {ci["dbName"]: ci for ci in customers_info}

print(f'number of databases: {len(customers_info)}')

customers_db_names = [e["dbName"] for e in customers_info]

number of databases: 184


A publication is online if:
- accessControl is not null AND 
   - ( (_cls is equal to SCORMConnect or public) 
   - OR (_cls is equal to private and the publication is associated to a leaner group with expiration date null or in the future) )

In [91]:
## get active learner groups
learners_group_all = list()
for nm in customers_db_names:
    learners_group_all += get_learners_group(client, nm)

is_learners_group_active = pd.Series([lg for lg in learners_group_all if ((lg.get("expirationDate", None) is None) or ((lg.get("expirationDate", None) is not None) and (lg["expirationDate"] > datetime.now())))])
learners_group_active = [lg for (lg,b) in zip(learners_group_all, is_learners_group_active) if b]

## get id of publications associated with an active learning group
id_pubs_with_active_learning_group = list()
for lg in learners_group_active:
    id_pubs_with_active_learning_group += [str(e) for e in lg["publications"]]
id_pubs_with_active_learning_group = pd.Series(id_pubs_with_active_learning_group).unique()

In [92]:
corresp_name_pub = {e.value: e.name.lower() for e in PUB_TYPE}

pubs_stat_list = list()
for nm in customers_db_names:
    pubs = get_pubs(client, nm)

    ## number of publications per type
    pubs_stats = pd.Series([get_pub_type(p) for p in pubs]).value_counts().to_dict()
    pubs_stats["db_name"] = nm

    for k,v in corresp_name_pub.items():
        pubs_stats[f"{v}_nb"] = pubs_stats.get(k, 0)
        if k in pubs_stats:
            del pubs_stats[k]

    ## 1) date of last create / update for each type of module
    ## 2) number of active publications per type
    for e in PUB_TYPE:
        pub_type = e.value

        # 1)
        for action in ["created", "updated"]:
            tmp = [(p[f"date_{action}"], p['name']) for p in pubs if p["_cls"] == pub_type]
            res = max(tmp, key=lambda x: x[0]) if (len(tmp) > 0) else (None, None)
            pubs_stats[f'{e.name.lower()}_last_{action}_date'] = res[0]
            pubs_stats[f'{e.name.lower()}_last_{action}_name'] = res[1]

        ## 2)
        is_pub_of_type = pd.Series([(p["_cls"] == pub_type) for p in pubs])
        is_scorm_or_public = pd.Series([(p.get("accessControl", None) is not None) and (p["accessControl"]["_cls"] in ["public", "SCORMConnect"]) for p in pubs])
        is_private_and_lg_active = pd.Series([(p.get("accessControl", None) is not None) and (p["accessControl"]["_cls"] == "private") and (str(p["_id"]) in id_pubs_with_active_learning_group) for p in pubs])
        is_pub_active = is_pub_of_type & (is_scorm_or_public | is_private_and_lg_active)
        pubs_stats[f"{e.name.lower()}_nb_active"] = is_pub_active.sum()

    pubs_stat_list.append(pubs_stats)


In [93]:
df_pubs = pd.DataFrame(pubs_stat_list)
df_pubs['client_name'] = df_pubs["db_name"].apply(lambda dbn: customers_info_dict[dbn]["name"])
df_pubs['url'] = df_pubs["db_name"].apply(lambda dbn: customers_info_dict[dbn]["hostnames"])

## reorder columns
col_order = ['client_name', 'url', 'db_name']
for pub_type in ["course", "single_module", "adaptive"]:
    col_order += [c for c in df_pubs.columns if c.startswith(pub_type)]
df_pubs = df_pubs[col_order]

## reorder rows by last updated date for single modules
df_pubs = df_pubs.sort_values('single_module_last_updated_date', ascending=False)

## write excel file

In [99]:
df_pubs.to_excel('~/Desktop/2024-02-05-type-publications.xlsx', index=False)

In [97]:
stats = {}
pub_types_list = ["course", "single_module", "adaptive"]
for c in pub_types_list:
    stats[f"nb_total_{c}"] = df_pubs[f"{c}_nb"].sum()
stats["nb_total"] = sum(stats.values())
for c in pub_types_list:
    stats[f"pct_total_{c}"] = stats[f"nb_total_{c}"] / stats["nb_total"]

In [98]:
stats

{'nb_total_course': 5343,
 'nb_total_single_module': 344,
 'nb_total_adaptive': 136,
 'nb_total': 5823,
 'pct_total_course': 0.9175682637815559,
 'pct_total_single_module': 0.05907607762321827,
 'pct_total_adaptive': 0.02335565859522583}