In [1]:
import os
import sys
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__
from dotenv import load_dotenv
import pandas as pd
import datetime
from dateutil import parser as date_parser
import json
from pprint import pprint
from functions import *

In [2]:
# Environment vars
load_dotenv()
connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')

In [3]:
# Hardcoded date
today = datetime.datetime(year=2022, month=4, day=7)
today_str = datetime_string(today, "/")

In [4]:
# Instantiate blob service client
try:
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
except Exception as e:
    print(f'Unable to connect to BlobServiceClient: {e}')

In [5]:
# Fetching users, accounts, and chargebee subscriptions.
# Merging all tables into one.
df = get_users_accounts_subscriptions_data(blob_service_client, today_str)

Fetching user data...
<class 'pandas.core.frame.DataFrame'>
Int64Index: 88329 entries, 3 to 90338
Data columns (total 10 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   _id.$oid                           88329 non-null  object 
 1   accountId.$oid                     88329 non-null  object 
 2   language                           88289 non-null  float64
 3   dateCreated.$date                  88329 non-null  int64  
 4   email                              88329 non-null  object 
 5   auth0Cache.usermetadata.lastName   88248 non-null  object 
 6   auth0Cache.usermetadata.firstName  88256 non-null  object 
 7   subscriptionType                   88329 non-null  object 
 8   isPrimary                          88329 non-null  bool   
 9   auth0Cache.usermetadata.demo       161 non-null    object 
dtypes: bool(1), float64(1), int64(1), object(7)
memory usage: 6.8+ MB
None
Fetching account data...


In [6]:
df["user|dateCreated.$date"].head()
df[df["user|dateCreated.$date"] != None].head()

Unnamed: 0,account|_id.$oid,account|name,account|domain,account|isActive,account|type,account|trialEnds.$date,account|dateCreated.$date,account|ownerId.$oid,account|edition,account|subscriptionId,...,user|language,user|dateCreated.$date,user|email,user|auth0Cache.usermetadata.lastName,user|auth0Cache.usermetadata.firstName,user|subscriptionType,user|isPrimary,user|auth0Cache.usermetadata.demo,account|id,account|users_count
0,5b14ee04854ff50007e2a077,Azavista,azavista,True,1.0,1528897421000,2018-06-04 07:45:08.977,5b14ee05854ff50007e2a078,gtmhub-summit,1mk51ZXQtzEmDUIfF,...,0.0,2018-06-07 14:13:32.294000,jp@azavista.com,van der Kuijl,JP,regular,True,,5b14ee04854ff50007e2a077,2
1,5b14ee04854ff50007e2a077,Azavista,azavista,True,1.0,1528897421000,2018-06-04 07:45:08.977,5b14ee05854ff50007e2a078,gtmhub-summit,1mk51ZXQtzEmDUIfF,...,0.0,2018-06-04 07:45:09,gabriel.thomaidis@azavista.com,Thomaidis,Gabriel,regular,True,,5b14ee04854ff50007e2a077,2
2,5b1e3680d8c5b500075ecfd3,wetransform GmbH,wetransform,True,1.0,1529311488473,2018-06-11 08:44:48.473,5b1e3680d8c5b500075ecfd4,gtmhub-summit,1mkVvueQuePTWUOmB,...,0.0,2018-06-11 08:44:48.498000,tr@wetransform.to,Reitz,Thorsten,regular,True,,5b1e3680d8c5b500075ecfd3,4
3,5b1e3680d8c5b500075ecfd3,wetransform GmbH,wetransform,True,1.0,1529311488473,2018-06-11 08:44:48.473,5b1e3680d8c5b500075ecfd4,gtmhub-summit,1mkVvueQuePTWUOmB,...,0.0,2018-06-12 12:20:29.635000,st@wetransform.to,Templer,Simon,regular,True,,5b1e3680d8c5b500075ecfd3,4
4,5b1e3680d8c5b500075ecfd3,wetransform GmbH,wetransform,True,1.0,1529311488473,2018-06-11 08:44:48.473,5b1e3680d8c5b500075ecfd4,gtmhub-summit,1mkVvueQuePTWUOmB,...,0.0,2019-06-25 15:40:06.882000,ch@wetransform.to,Hönn,Christopher,regular,True,,5b1e3680d8c5b500075ecfd3,4


In [7]:
# Last 60 days
threshold_date = today - datetime.timedelta(days=60)

In [8]:
# Filter on users created at least 60 days ago
df = df[df["user|dateCreated.$date"] <= threshold_date]

In [9]:
# Number of unique users existing for at least 60 days
df.shape[0]

35074

In [10]:
# Read in CSDA data
csda_df = pd.read_csv(os.path.join(os.pardir, "shared_data", "csda_salesforce_export.csv"))

# Rename columns
csda_df.columns = [
    "_".join(column.lower().split())
    for column in csda_df.columns
]

# Add prefix for clarity after join
csda_df = csda_df.add_prefix("salesforce|")

In [11]:
# Left join CSDA table with main dataframe (df)
df = df.merge(
    csda_df,
    how="left",
    left_on="account|_id.$oid",
    right_on="salesforce|chargebee_id"
)

In [12]:
# Get Hubspot data
hubspot_df = get_hubspot_data()

Fetching Hubspot data...
Hubspot contacts...
Hubspot companies...


In [13]:
# Merge users with HubSpot data
df = df.merge(
    hubspot_df,
    how="left",
    left_on="user|email",
    right_on="hubspot|email"
)

In [14]:
# Add CSDA column
df["salesforce|team_member_name"] = df["salesforce|team_member_name"].fillna("-")
df["salesforce|is_csda"] = df["salesforce|team_member_name"].map(
    lambda x: True if x != "-" else False
)

In [15]:
# Rename columns intended to be kept
column_name_remapping = {
    "account|_id.$oid": "account.id",
    "account|name": "account.name",
    "account|dateCreated.$date": "account.date_created",
    "chargebee|status": "chargebee.status",
    "user|_id.$oid": "user.id",
    "user|language": "user.language",
    "user|dateCreated.$date": "user.date_created",
    "user|email": "user.email",
    "user|auth0Cache.usermetadata.lastName": "user.last_name",
    "user|auth0Cache.usermetadata.firstName": "user.first_name",
    "account|users_count": "account.users_count",
    "hubspot|jobtitle": "hubspot.job_title",
    "hubspot|industry": "hubspot.industry",
    "hubspot|numberofemployees": "hubspot.number_of_employees",
    "salesforce|is_csda": "salesforce.is_csda",
    "hubspot|country": "hubspot.country"
}

# Keep only the keys of mapping columns
df = df[column_name_remapping.keys()]

# Change names of columns based on mapping
df = df.rename(columns=column_name_remapping)

In [16]:
# Change language code to categorical
language_code_mapping = {
    0: "English",
    1: "German",
    2: "Chinese",
    3: "Bulgarian",
    4: "Spanish",
    5: "French",
    6: "Portuguese"
}

df = df.replace({"user.language": language_code_mapping})

In [17]:
df.head()

Unnamed: 0,account.id,account.name,account.date_created,chargebee.status,user.id,user.language,user.date_created,user.email,user.last_name,user.first_name,account.users_count,hubspot.job_title,hubspot.industry,hubspot.number_of_employees,salesforce.is_csda,hubspot.country
0,5b14ee04854ff50007e2a077,Azavista,2018-06-04 07:45:08.977,active,5b193d8cd340c50007d7c727,English,2018-06-07 14:13:32.294000,jp@azavista.com,van der Kuijl,JP,2,Commercial Director,Electronics,45,False,Netherlands
1,5b14ee04854ff50007e2a077,Azavista,2018-06-04 07:45:08.977,active,5b14ee05854ff50007e2a078,English,2018-06-04 07:45:09,gabriel.thomaidis@azavista.com,Thomaidis,Gabriel,2,People Operations Specialist,Electronics,45,False,Netherlands
2,5b1e3680d8c5b500075ecfd3,wetransform GmbH,2018-06-11 08:44:48.473,active,5b1e3680d8c5b500075ecfd4,English,2018-06-11 08:44:48.498000,tr@wetransform.to,Reitz,Thorsten,4,"CEO, Founder",Electronics,10,False,Switzerland
3,5b1e3680d8c5b500075ecfd3,wetransform GmbH,2018-06-11 08:44:48.473,active,5b1fba8dd8c5b500075f2db2,English,2018-06-12 12:20:29.635000,st@wetransform.to,Templer,Simon,4,Co-founder,Electronics,10,False,Switzerland
4,5b1e3680d8c5b500075ecfd3,wetransform GmbH,2018-06-11 08:44:48.473,active,5d124056d4643c00018d63a8,English,2019-06-25 15:40:06.882000,ch@wetransform.to,Hönn,Christopher,4,,Electronics,10,False,Switzerland


In [18]:
# Fetch user_roles data
user_roles_df = get_user_roles_table(blob_service_client, today_str)
user_roles_df.head()

Unnamed: 0,user_roles._id.$oid,user_roles.userId.$oid,user_roles.roleId.$oid,user_roles.accountId.$oid
0,574e9259ed915d0006b985e5,574e9259ed915d0006b985e4,573d93d9ed915d00052efb6b,574e9259ed915d0006b985e3
1,573e222fed915d0005cc2d07,573e222fed915d0005cc2d06,573d93d9ed915d00052efb6b,573e222fed915d0005cc2d05
2,573d93d9ed915d00052efb6c,573d93d9ed915d00052efb6a,573d93d9ed915d00052efb6b,573d93d9ed915d00052efb69
3,5746e3c8ed915d0005cc319c,5746e3c8ed915d0005cc319b,573d93d9ed915d00052efb6b,5746e3c8ed915d0005cc319a
4,573f32bded915d0005cc2e5d,573f32bded915d0005cc2e5c,573d93d9ed915d00052efb6b,573f32bded915d0005cc2e5b


In [19]:
# Fetch roles data
roles_df = get_roles_table(blob_service_client, today_str)
roles_df.head()

Unnamed: 0,roles._id.$oid,roles.name,roles.accountId.$oid
0,573dbb61ed915d0005cc2c4d,user,
1,5a9f9cbee5274a0007acfcf9,Company B,57fb5f7bed915d0006582898
2,5b86ad85df457100079c04e4,Admin II,57fb5f7bed915d0006582898
3,5b8d2accf9159100080416a2,Air (bo's experiment),573dbb12ed915d0005cc2c46
4,58f632d3ed915d0005e9ef6c,test role,58822288ed915d0005afa6ee


In [20]:
# Join user_roles with roles
user_roles_df = user_roles_df.merge(
    roles_df,
    how="left",
    left_on="user_roles.roleId.$oid",
    right_on="roles._id.$oid"
)

user_roles_df.head()

Unnamed: 0,user_roles._id.$oid,user_roles.userId.$oid,user_roles.roleId.$oid,user_roles.accountId.$oid,roles._id.$oid,roles.name,roles.accountId.$oid
0,574e9259ed915d0006b985e5,574e9259ed915d0006b985e4,573d93d9ed915d00052efb6b,574e9259ed915d0006b985e3,573d93d9ed915d00052efb6b,admin,
1,574e9259ed915d0006b985e5,574e9259ed915d0006b985e4,573d93d9ed915d00052efb6b,574e9259ed915d0006b985e3,573d93d9ed915d00052efb6b,admin,
2,573e222fed915d0005cc2d07,573e222fed915d0005cc2d06,573d93d9ed915d00052efb6b,573e222fed915d0005cc2d05,573d93d9ed915d00052efb6b,admin,
3,573e222fed915d0005cc2d07,573e222fed915d0005cc2d06,573d93d9ed915d00052efb6b,573e222fed915d0005cc2d05,573d93d9ed915d00052efb6b,admin,
4,573d93d9ed915d00052efb6c,573d93d9ed915d00052efb6a,573d93d9ed915d00052efb6b,573d93d9ed915d00052efb69,573d93d9ed915d00052efb6b,admin,


In [21]:
unique_user_roles_df = get_roles_by_user(user_roles_df)
unique_user_roles_df.head()

Unnamed: 0,user_roles.userId.$oid,roles
0,573d9359ed915d00052efb10,SysAdmin
1,573d93d9ed915d00052efb6a,admin
2,573db6aeed915d0005cc2bc5,admin
3,573dbb12ed915d0005cc2c47,"admin, user, user+, Ivan Osmak's team_7ba678c2..."
4,573dbb61ed915d0005cc2c4c,"admin, user, user+, Engineering_a8a9b5181dbd8f..."


In [22]:
# Join df with user_roles_df
df = df.merge(
    unique_user_roles_df,
    how="left",
    left_on="user.id",
    right_on="user_roles.userId.$oid"
)

df.head()

Unnamed: 0,account.id,account.name,account.date_created,chargebee.status,user.id,user.language,user.date_created,user.email,user.last_name,user.first_name,account.users_count,hubspot.job_title,hubspot.industry,hubspot.number_of_employees,salesforce.is_csda,hubspot.country,user_roles.userId.$oid,roles
0,5b14ee04854ff50007e2a077,Azavista,2018-06-04 07:45:08.977,active,5b193d8cd340c50007d7c727,English,2018-06-07 14:13:32.294000,jp@azavista.com,van der Kuijl,JP,2,Commercial Director,Electronics,45,False,Netherlands,5b193d8cd340c50007d7c727,"admin, Management_468cde61fa05e7e0_MembersRole..."
1,5b14ee04854ff50007e2a077,Azavista,2018-06-04 07:45:08.977,active,5b14ee05854ff50007e2a078,English,2018-06-04 07:45:09,gabriel.thomaidis@azavista.com,Thomaidis,Gabriel,2,People Operations Specialist,Electronics,45,False,Netherlands,5b14ee05854ff50007e2a078,"admin, People Operations_8eaf717bc3c07560_Memb..."
2,5b1e3680d8c5b500075ecfd3,wetransform GmbH,2018-06-11 08:44:48.473,active,5b1e3680d8c5b500075ecfd4,English,2018-06-11 08:44:48.498000,tr@wetransform.to,Reitz,Thorsten,4,"CEO, Founder",Electronics,10,False,Switzerland,5b1e3680d8c5b500075ecfd4,"admin, Revenue_MembersRole, Service_MembersRol..."
3,5b1e3680d8c5b500075ecfd3,wetransform GmbH,2018-06-11 08:44:48.473,active,5b1fba8dd8c5b500075f2db2,English,2018-06-12 12:20:29.635000,st@wetransform.to,Templer,Simon,4,Co-founder,Electronics,10,False,Switzerland,5b1fba8dd8c5b500075f2db2,"user, Product_MembersRole, Product_0d8de34ddea..."
4,5b1e3680d8c5b500075ecfd3,wetransform GmbH,2018-06-11 08:44:48.473,active,5d124056d4643c00018d63a8,English,2019-06-25 15:40:06.882000,ch@wetransform.to,Hönn,Christopher,4,,Electronics,10,False,Switzerland,5d124056d4643c00018d63a8,user


In [23]:
table_name = "managers"

data_eu = get_invalid_json(
    blob_service_client, 
    "researchanalyticsinsights", 
    f"Unprocessed/Gtmhub MongoDB EU/{today_str}/{table_name}.json"
)
data_us = get_invalid_json(
    blob_service_client, 
    "researchanalyticsinsights", 
    f"Unprocessed/Gtmhub MongoDB US/{today_str}/{table_name}.json"
)

managers_df = pd.DataFrame(json_csv(data_eu + data_us))

In [24]:
managers_df.head()

Unnamed: 0,_id.$oid,userId.$oid,accountId.$oid,dateCreated.$date,modifiedAt.$date
0,6109079e6fa5a93296473183,57fde284ed915d0006582b22,57fde284ed915d0006582b21,1627981726237,1627981726237
1,6109079e6fa5a93296473185,5d2f6c7d429b5e00017edafc,5a96a680ed915d00067413e7,1627981726300,1627981726300
2,6109079e6fa5a93296473186,5d16367e05dd4a0001766b92,5a96a680ed915d00067413e7,1627981726317,1627981726317
3,6109079e6fa5a93296473187,5b1d6208d8c5b500075ec175,5a96a680ed915d00067413e7,1627981726330,1627981726330
4,6109079e6fa5a93296473188,5cab7dec98c4930001511faa,5a96a680ed915d00067413e7,1627981726339,1627981726339


In [25]:
manager_ids = managers_df["userId.$oid"].tolist()

df["user.is_manager"] = df["user.id"].map(lambda x: True if x in manager_ids else False)

In [26]:
df[df["user.is_manager"] == False].head()

Unnamed: 0,account.id,account.name,account.date_created,chargebee.status,user.id,user.language,user.date_created,user.email,user.last_name,user.first_name,account.users_count,hubspot.job_title,hubspot.industry,hubspot.number_of_employees,salesforce.is_csda,hubspot.country,user_roles.userId.$oid,roles,user.is_manager
953,5d08d0b9665dea00017e208e,GCT,2019-06-18 11:53:29.587,active,61b86449896c1800011a4f04,English,2021-12-14 09:30:49.403000,jonathan@alocogroup.com,,gempro,59,,,,False,,61b86449896c1800011a4f04,"user, GCTCustUser, GradConn CustUser",False
1030,5df9e35f684cfe0001139326,OSRE,2019-12-18 08:29:19.568,active,6188e5f40d931f00017dc871,English,2021-11-08 08:55:16.514000,daan@osre.nl,,daan,20,,,,False,,6188e5f40d931f00017dc871,user,False
1132,5b3fe40a0912100007753eb9,Rohlik.cz,2018-07-06 21:50:02.937,active,61dae0e6608733000153a1d9,English,2022-01-09 13:19:34.446000,alina-daniela.istrate@rohlik.cz,Istrate,Alina,113,,,,False,,61dae0e6608733000153a1d9,user,False
1133,5b3fe40a0912100007753eb9,Rohlik.cz,2018-07-06 21:50:02.937,active,61dea0209ab05b00013de1c6,English,2022-01-12 09:32:16.758000,miroslav.simko@rohlik.cz,Šimko,Miroslav,113,,,,False,,61dea0209ab05b00013de1c6,user,False
1134,5b3fe40a0912100007753eb9,Rohlik.cz,2018-07-06 21:50:02.937,active,61e7ed9f0da6f40001ebd001,English,2022-01-19 10:53:19.842000,zdenek.lhotak@rohlik.cz,Lhoták,Zdeněk,113,,,,False,,61e7ed9f0da6f40001ebd001,user,False


In [27]:
df["roles"] = df["roles"].fillna("")
df["user.is_okr_champion"] = df["roles"].map(lambda role: True if "champion" in (_role := role.lower()) or ("okr" in _role and "champion" in _role) else False)

In [28]:
df[df["user.is_okr_champion"] == True].head()

Unnamed: 0,account.id,account.name,account.date_created,chargebee.status,user.id,user.language,user.date_created,user.email,user.last_name,user.first_name,account.users_count,hubspot.job_title,hubspot.industry,hubspot.number_of_employees,salesforce.is_csda,hubspot.country,user_roles.userId.$oid,roles,user.is_manager,user.is_okr_champion
1148,5c320d14c6187b00017298a3,Ubisoft,2019-01-06 14:13:40.881,non_renewing,5de778a1566a3a000100fd76,English,2019-12-04 09:13:05.550000,mathieu.haumesser@ubisoft.com,Haumesser,Mathieu,76,IT Project Coordinator for the CIO,Recreation,20000.0,False,France,5de778a1566a3a000100fd76,"admin, Office of the CIO_435c171fc22bb944_Memb...",True,True
1178,5c320d14c6187b00017298a3,Ubisoft,2019-01-06 14:13:40.881,non_renewing,60cb7c7847b2a2000161c087,English,2021-06-17 16:46:48.325000,ryan.nelson@ubisoft.com,Nelson,Ryan,76,,,,False,,60cb7c7847b2a2000161c087,"Ubisoft Business Teams, Ubisoft IT Teams, user...",True,True
1187,5c320d14c6187b00017298a3,Ubisoft,2019-01-06 14:13:40.881,non_renewing,6136046425c2a700012f40f0,English,2021-09-06 12:07:00.503000,scott.castelli@ubisoft.com,Castelli,Scott,76,information_technology,Recreation,20000.0,False,France,6136046425c2a700012f40f0,"user, Office of the CIO_435c171fc22bb944_Membe...",True,True
1194,5c320d14c6187b00017298a3,Ubisoft,2019-01-06 14:13:40.881,non_renewing,614c69e5b499e700016a6b2a,English,2021-09-23 11:49:57.843000,laetitia.scarlat-gilain@ubisoft.com,Scarlat,Laetitia,76,,Recreation,20000.0,False,France,614c69e5b499e700016a6b2a,"user, admin, Ubisoft IT Teams, OKRs Champions ...",True,True
1203,5c320d14c6187b00017298a3,Ubisoft,2019-01-06 14:13:40.881,non_renewing,6182de1053f4f00001b97caf,English,2021-11-03 19:08:00.192000,Carmen.Cismas@ubisoft.com,Cismas,Carmen,76,,,,False,,6182de1053f4f00001b97caf,"Ubisoft IT Teams, OKRs Champions & Communicati...",True,True


In [29]:
# Drop unnecessary columns
"""
Should I drop all id's?
account.id
user.id

user_roles._id.$oid
user_roles.userId.$oid
user_roles.roleId.$oid
user_roles.accountId.$oid
roles._id.$oid
roles.accountId.$oid

"""

df = df[
    df.columns[
        ~df.columns.isin(
            [
                # "account.id",
                "user.id",
                "user_roles._id.$oid",
                "user_roles.userId.$oid",
                "user_roles.roleId.$oid",
                "user_roles.accountId.$oid",
                "roles._id.$oid",
                "roles.accountId.$oid"
            ]
        )
    ]
]

In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 35262 entries, 0 to 35261
Data columns (total 18 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   account.id                   35262 non-null  object        
 1   account.name                 35262 non-null  object        
 2   account.date_created         35262 non-null  datetime64[ns]
 3   chargebee.status             35262 non-null  object        
 4   user.language                35262 non-null  object        
 5   user.date_created            35262 non-null  object        
 6   user.email                   35262 non-null  object        
 7   user.last_name               35249 non-null  object        
 8   user.first_name              35250 non-null  object        
 9   account.users_count          35262 non-null  int64         
 10  hubspot.job_title            1560 non-null   object        
 11  hubspot.industry             3088 non-nul

In [31]:
df["user.date_created"] = df["user.date_created"].astype("datetime64[ns]")
df["account.date_created"] = df["account.date_created"].astype("datetime64[ns]")

In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 35262 entries, 0 to 35261
Data columns (total 18 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   account.id                   35262 non-null  object        
 1   account.name                 35262 non-null  object        
 2   account.date_created         35262 non-null  datetime64[ns]
 3   chargebee.status             35262 non-null  object        
 4   user.language                35262 non-null  object        
 5   user.date_created            35262 non-null  datetime64[ns]
 6   user.email                   35262 non-null  object        
 7   user.last_name               35249 non-null  object        
 8   user.first_name              35250 non-null  object        
 9   account.users_count          35262 non-null  int64         
 10  hubspot.job_title            1560 non-null   object        
 11  hubspot.industry             3088 non-nul

In [33]:
df.head()

Unnamed: 0,account.id,account.name,account.date_created,chargebee.status,user.language,user.date_created,user.email,user.last_name,user.first_name,account.users_count,hubspot.job_title,hubspot.industry,hubspot.number_of_employees,salesforce.is_csda,hubspot.country,roles,user.is_manager,user.is_okr_champion
0,5b14ee04854ff50007e2a077,Azavista,2018-06-04 07:45:08.977,active,English,2018-06-07 14:13:32.294,jp@azavista.com,van der Kuijl,JP,2,Commercial Director,Electronics,45,False,Netherlands,"admin, Management_468cde61fa05e7e0_MembersRole...",True,False
1,5b14ee04854ff50007e2a077,Azavista,2018-06-04 07:45:08.977,active,English,2018-06-04 07:45:09.000,gabriel.thomaidis@azavista.com,Thomaidis,Gabriel,2,People Operations Specialist,Electronics,45,False,Netherlands,"admin, People Operations_8eaf717bc3c07560_Memb...",True,False
2,5b1e3680d8c5b500075ecfd3,wetransform GmbH,2018-06-11 08:44:48.473,active,English,2018-06-11 08:44:48.498,tr@wetransform.to,Reitz,Thorsten,4,"CEO, Founder",Electronics,10,False,Switzerland,"admin, Revenue_MembersRole, Service_MembersRol...",True,False
3,5b1e3680d8c5b500075ecfd3,wetransform GmbH,2018-06-11 08:44:48.473,active,English,2018-06-12 12:20:29.635,st@wetransform.to,Templer,Simon,4,Co-founder,Electronics,10,False,Switzerland,"user, Product_MembersRole, Product_0d8de34ddea...",True,False
4,5b1e3680d8c5b500075ecfd3,wetransform GmbH,2018-06-11 08:44:48.473,active,English,2019-06-25 15:40:06.882,ch@wetransform.to,Hönn,Christopher,4,,Electronics,10,False,Switzerland,user,True,False


In [34]:
# Format datetime fields as string for output file
df["account.date_created"] = df["account.date_created"].dt.strftime("%Y/%m/%d")
df["user.date_created"] = df["user.date_created"].dt.strftime("%Y/%m/%d")

In [35]:
df.shape

(35262, 18)

In [36]:
# Save data to csv file
df.to_csv(f"users_60+days_data_{datetime_string(today, '-')}_v2_verify.csv", index=False)