In [3]:
!pip install -r requirements.txt --no-warn-conflicts --progress-bar emoji
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [107]:
import girder_client
import json
import os
import pandas as pd
import psycopg2
import postgres_to_girder
import urllib
from datetime import date

config, context, api_url = postgres_to_girder.config()

girder_connection=postgres_to_girder.connect_to_girder(
    api_url=api_url,
    authentication=(
        config["girder"]["user"],
        config["girder"]["password"]
    )
)
conn = postgres_to_girder.connect_to_postgres(
    config["postgres"]
)

activities_id = postgres_to_girder.get_girder_id_by_name(
    entity="collection",
    name="activities",
    girder_connection=girder_connection
)
activities_id = girder_connection.createCollection(
    name="activities",
    public=True
) if not activities_id else activities_id

postgres_tables = {
    table: pd.io.sql.read_sql_query(
        "SELECT * FROM {0};".format(
            table
        ),
        conn
    ) for table in {
        "acts",
        "users",
        "user_acts",
        "organizations",
        "answers"
    }
}

users_emails = postgres_to_girder.postgres_users_to_girder_users(
    postgres_tables["users"],
    girder_connection,
    config["missing_persons"]
)

postgres_tables["acts"] = _respondents(
    postgres_tables["acts"]
)

Connected to the Girder database 🏗🍃 and authenticated.
Connected to the Postgres database 🐘


In [112]:
acts = postgres_tables["acts"]
gc = girder_connection
users = postgres_tables["users"]
for i in range(acts.shape[0]):
    activity = acts.loc[i, "title"]
    activity_name, abbreviation = get_abbreviation(activity)
    respondent = acts.loc[i ,"respondent"]
    item_version = postgres_to_girder.get_postgres_item_version(
        activity_name,
        abbreviation,
        activity_source="Healthy Brain Network",
        respondent=respondent,
        version=date.strftime(
            acts.loc[
                i,
                "updated_at"
            ],
            "%F"
        )
    )
        
    # Create or locate top-level folder and return _id
    activity_folder_id = gc.createFolder(
        name=activity_name,
        parentId=activities_id,
        parentType="collection",
        public=True,
        reuseExisting=True
    )["_id"]
    
    # Create or locate Item
    user = {
        "@id": "".join([
            "user/",
            users_emails[
                users[
                    users["id"]==acts.loc[
                        i,
                        "user_id"
                    ]
                ]["email"].values[0]
            ]
        ])
    }
    act_data = json.loads(
        acts.loc[
            i,
            "act_data"
        ]
    )
    activity_item_id = gc.createItem(
        name=item_version,
        parentFolderId=activity_folder_id,
        reuseExisting=True,
        metadata={
            **context,
            "schema:name": {
                "@value": activity_name,
                "@language": "en-US"
            },
            "abbreviation": abbreviation if abbreviation else None,
            "@type": acts.loc[
                i,
                "type"
            ],
            "status": acts.loc[
                i,
                "status"
            ],
            "pav:lastUpdatedOn": acts.loc[
                i,
                "updated_at"
            ].isoformat(),
            **{
                prop: act_data[prop] for prop in 
                act_data if prop not in [
                    "questions",
                    "instruction",
                    "image_url"
                ]
            },
            "instruction": {
                "@value": act_data["instruction"],
                "@language": "en-US"
            } if (
                (
                    "instruction" in act_data
                ) and len(
                    act_data["instruction"]
                )
            ) else None,
            "oslc:modifiedBy": user,
            "pav:createdBy": user,
            "respondent": respondent if respondent else None
        }
    )["_id"]
        
    # Upload applicable file(s)
    img = urllib.request.urlopen(
        act_data["image_url"]
    ) if "image_url" in act_data else None
    if img:
        item_files = \
        postgres_to_girder.get_files_in_item(
            gc,
            activity_item_id
        )
        img_name = ".".join([
            ''.join([
                c for c in activity_name if \
                c.isalnum()
            ]),
            act_data[
                "image_url"
            ].split("?")[0].split(".")[-1]
        ])
        img_id = item_files[
            0
        ][
            "_id"
        ] if img_name in [
            file[
                "name"
            ] for file in \
            item_files
        ] else gc.uploadFile(
            parentId=activity_item_id,
            stream=img,
            name=img_name,
            size=int(img.info()["Content-Length"])
        )["_id"]
        gc.addMetadataToItem(
            itemId=activity_item_id,
            metadata={
                "image_url": "".join([
                    api_url,
                    "/file/",
                    img_id,
                    "/download?",
                    "contentDisposition="
                    "inline"
                ])
            }
        )