In [None]:
!pip install -r requirements.txt --no-warn-conflicts --progress-bar emoji
%load_ext autoreload
%autoreload 2

In [None]:
import girder_client
import json
import os
import pandas as pd
import psycopg2
import postgres_to_girder
import urllib
from datetime import date

with open ("config.json", "r") as j:
    config = json.load(j)
with open ("context.json", "r") as j:
    context = json.load(j)
api_url = "".join([
    "http://",
    config["girder"]["host"],
    "/api/v1"
])

In [None]:
try:
    gc = girder_client.GirderClient(
        apiUrl=api_url
    )
    gc.authenticate(
        config["girder"]["user"],
        config["girder"]["password"])
    print("Connected to the Girder database 🏗🍃")
except:
    print(
        "I am unable to connect to the "
        "Girder database 🏗🍃"
    )

In [None]:
activities_id = postgres_to_girder.get_girder_id_by_name(
    entity="collection",
    name="activities",
    girder_connection=gc
)

In [None]:
activities_id

In [None]:
try:
    conn = psycopg2.connect(
        " ".join(
            [
                "=".join([
                    key,
                    config["postgres"][key]
                ]) for key in config["postgres"]
            ]
        )
    )
    print("Connected to the Postgres database 🐘")
except:
    print(
        "I am unable to connect to the "
        "Postgres database 🐘"
    )

In [None]:
acts = pd.io.sql.read_sql_query(
    "SELECT * FROM acts;",
    conn
)
users = pd.io.sql.read_sql_query(
    "SELECT * FROM users;",
    conn
)

Pull respondents out of titles in DataFrame from Postgres:

In [None]:
acts["Respondent"] = acts["title"].apply(
    lambda x: x.split(
        " - "
    )[
        1
    ].split(
        " "
    )[
        0
    ] if " - " in x else x.split(
        " – "
    )[
        1
    ].split(
        " "
    )[
        0
    ] if " – " in x else x.split(
        "-"
    )[
        1
    ].split(
        " "
    )[
        0
    ] if "Scale-" in x else x.split(
        " ― "
    )[
        1
    ].split(
        "-"
    )[
        0
    ] if "―" in x else x.split(
        "-"
    )[
        1
    ].split(
        ")"
    )[
        0
    ] if "Index-" in x else "Self" if (
        (
            "_SR" in x
        ) or (
            "-SR" in x
        )
    ) else "Parent" if (
        "_P" in x
    ) else ""
)
acts["title"] = acts["title"].apply(
    lambda x: x.split(
        " - "
    )[
        0
    ] if " - " in x else x.split(
        " – "
    )[
        0
    ] if " – " in x else x.split(
        "-"
    )[
        0
    ] if "Scale-" in x else x.split(
        " ― "
    )[
        0
    ] if "―" in x else x.split(
        "-"
    )[
        0
    ] if "Index-" in x else x.replace(
        " Self Report",
        ""
    ).replace(
        " Parent Report",
        ""
    )
).apply(
    lambda x: "{0})".format(
        x
    ) if (
        "(" in x
    ) and (
        ")"
    ) not in x else x
)

In [None]:
acts

In [None]:
for i, activity in enumerate(
    list(
        acts["title"]
    )
):
    abbreviation = None
    if "(" in activity:
        anames = [
            a.strip(
                ")"
            ).strip() for a in activity.split(
                "("
            )
        ]
        if (
            len(anames)==2
        ):
            if (
                len(anames[0])>len(anames[1])
            ):
                abbreviation = anames[1]
                activity_name = anames[0]
            else:
                abbreviation = anames[0]
                activity_name = anames[1]
        else:
            print(anames)
    activity_name = activity if not abbreviation else activity_name
            
    respondent = acts[
        acts["title"]==activity
    ]["Respondent"].values[0]
    item_version = "{0} ({1})".format(
        " ― ".join([
            "Healthy Brain Network",
            "{0} ({1}) {2}".format(
                activity_name,
                abbreviation,
                "― {0} Report".format(
                    respondent
                ) if len(respondent) else ""
            ) if abbreviation else "{0} {1}".format(
                activity,
                "― {0} Report".format(
                    respondent
                ) if len(respondent) else ""
            )
        ]).strip(" "),
        date.strftime(
            acts.loc[
                i,
                "updated_at"
            ],
            "%F"
        )
    )
        
    # Create or locate top-level folder and return _id
    activity_folder_id = gc.createFolder(
        name=activity if not abbreviation else activity_name,
        parentId=activities_id,
        parentType="collection",
        public=True,
        reuseExisting=True
    )["_id"]
    
    # Create or locate Item
    user = {
        "@id": "".join([
            "user/",
            postgres_to_girder.get_user_id_by_email(
                gc,
                users[
                    users["id"]==acts.loc[
                        i,
                        "user_id"
                    ]
                ]["email"].values[0]
            )
        ])
    }
    act_data = json.loads(
        acts.loc[
            i,
            "act_data"
        ]
    )
    activity_item_id = gc.createItem(
        name=item_version,
        parentFolderId=activity_folder_id,
        reuseExisting=True,
        metadata={
            **context,
            "schema:name": {
                "@value": activity_name,
                "@language": "en-US"
            },
            "abbreviation": abbreviation if abbreviation else None,
            "@type": acts.loc[
                i,
                "type"
            ],
            "status": acts.loc[
                i,
                "status"
            ],
            "pav:lastUpdatedOn": acts.loc[
                i,
                "updated_at"
            ].isoformat(),
            **{ prop: act_data[prop] for prop in 
                act_data if prop not in [
                    "questions",
                    "instruction",
                    "image_url"
                ]
            },
            "instruction": {
                "@value": act_data["instruction"],
                "@language": "en-US"
            } if (
                (
                    "instruction" in act_data
                ) and len(
                    act_data["instruction"]
                )
            ) else None,
            "oslc:modifiedBy": user,
            "pav:createdBy": user
        }
    )["_id"]
        
    # Upload applicable file(s)
    img = urllib.request.urlopen(
        act_data["image_url"]
    ) if "image_url" in act_data else None
    if img:
        new_img_id = gc.uploadFile(
            parentId=activity_item_id,
            stream=img,
            name=".".join([
                ''.join([
                    c for c in activity_name if c.isalnum()
                ]),
                act_data[
                    "image_url"
                ].split("?")[0].split(".")[-1]
            ]),
            size=int(img.info()["Content-Length"])
        )["_id"]
        gc.addMetadataToItem(
            itemId=activity_item_id,
            metadata={
                "image_url": "".join([
                    api_url,
                    "/file/",
                    new_img_id,
                    "/download?contentDisposition=inline"
                ])
            }
        )

In [None]:
for i in range(users.shape[0]):
    user_id = postgres_to_girder.get_user_id_by_email(
        gc,
        users.loc[i,"email"]
    )
    if user_id:
        print(user_id)
    else:
        gc.post(
            "".join([
                "user?login=",
                users.loc[i,"email"].replace(
                    "@",
                    "at"
                ),
                "&firstName=",
                config["missing_persons"]["first_name"] if not users.loc[
                    i,
                    "first_name"
                ] else users.loc[
                    i,
                    "first_name"
                ] if not " " in users.loc[
                    i,
                    "first_name"
                ] else users.loc[
                    i,
                    "first_name"
                ].split(" ")[0],
                "&lastName=",
                users.loc[
                    i,
                    "last_name"
                ] if users.loc[
                    i,
                    "last_name"
                ] else config[
                    "missing_persons"
                ][
                    "last_name"
                ] if not users.loc[
                    i,
                    "first_name"
                ] else users.loc[
                    i,
                    "first_name"
                ].split(" ")[1] if " " in users.loc[
                    i,
                    "first_name"
                ] else users.loc[
                    i,
                    "first_name"
                ],
                "&password=",
                users.loc[i,"password"],
                "&admin=",
                "true" if "admin" in str(users.loc[
                    i,
                    "role"
                ]) else "false",
                "&email=",
                users.loc[
                    i,
                    "email"
                ]
            ])
        )