In [1]:
import os
import sys
from random import sample
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__
from dotenv import load_dotenv
import pandas as pd
import datetime
from dateutil import parser as date_parser
import json
from pprint import pprint
from functions_2 import *

In [2]:
# Environment vars
load_dotenv()
connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')

In [3]:
# Hardcoded date
today = datetime.datetime(year=2022, month=4, day=7)
today_str = datetime_string(today, "/")

In [4]:
# Instantiate blob service client
try:
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
except Exception as e:
    print(f'Unable to connect to BlobServiceClient: {e}')

In [5]:
# Get Amplitude tag information
source_name = "Redshift"
schema = "javascript"
_table = "filter_added"
table_name = f"{schema}{_table}"

amplitude_tags = get_invalid_json(
    blob_service_client, 
    "researchanalyticsinsights", 
    f"Unprocessed/{source_name}/{today_str}/{table_name}.json"
)

amplitude_tags = pd.DataFrame(amplitude_tags)

In [6]:
# filter out NA values
amplitude_tags = amplitude_tags[~amplitude_tags["field_name"].isnull()]

# Only include tag filter usage 
amplitude_tags = amplitude_tags[amplitude_tags["field_name"].str.contains("tag", case=False)]
amplitude_tags.head()
filter_tags = amplitude_tags.groupby("user_id")["id"].count().reset_index().rename(
    columns={
        "id": "event.filtered_by_tag_count",
        "user_id": "event.user_id"
    }
)
# filter_tags.head()

In [7]:
# Fetching users, accounts, and chargebee subscriptions.
# Merging all tables into one.
df = get_users_accounts_subscriptions_data(blob_service_client, today_str)

Fetching user data...
<class 'pandas.core.frame.DataFrame'>
Int64Index: 88329 entries, 3 to 90338
Data columns (total 10 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   _id.$oid                           88329 non-null  object 
 1   accountId.$oid                     88329 non-null  object 
 2   language                           88289 non-null  float64
 3   dateCreated.$date                  88329 non-null  int64  
 4   email                              88329 non-null  object 
 5   auth0Cache.usermetadata.lastName   88248 non-null  object 
 6   auth0Cache.usermetadata.firstName  88256 non-null  object 
 7   subscriptionType                   88329 non-null  object 
 8   isPrimary                          88329 non-null  bool   
 9   auth0Cache.usermetadata.demo       161 non-null    object 
dtypes: bool(1), float64(1), int64(1), object(7)
memory usage: 6.8+ MB
None
Fetching account data...


In [8]:
df["user|dateCreated.$date"].head()
df[df["user|dateCreated.$date"] != None].head()

Unnamed: 0,account|_id.$oid,account|name,account|domain,account|isActive,account|type,account|trialEnds.$date,account|dateCreated.$date,account|ownerId.$oid,account|edition,account|subscriptionId,...,user|language,user|dateCreated.$date,user|email,user|auth0Cache.usermetadata.lastName,user|auth0Cache.usermetadata.firstName,user|subscriptionType,user|isPrimary,user|auth0Cache.usermetadata.demo,account|id,account|users_count
0,5b14ee04854ff50007e2a077,Azavista,azavista,True,1.0,1528897421000,2018-06-04 07:45:08.977,5b14ee05854ff50007e2a078,gtmhub-summit,1mk51ZXQtzEmDUIfF,...,0.0,2018-06-07 14:13:32.294000,jp@azavista.com,van der Kuijl,JP,regular,True,,5b14ee04854ff50007e2a077,2
1,5b14ee04854ff50007e2a077,Azavista,azavista,True,1.0,1528897421000,2018-06-04 07:45:08.977,5b14ee05854ff50007e2a078,gtmhub-summit,1mk51ZXQtzEmDUIfF,...,0.0,2018-06-04 07:45:09,gabriel.thomaidis@azavista.com,Thomaidis,Gabriel,regular,True,,5b14ee04854ff50007e2a077,2
2,5b1e3680d8c5b500075ecfd3,wetransform GmbH,wetransform,True,1.0,1529311488473,2018-06-11 08:44:48.473,5b1e3680d8c5b500075ecfd4,gtmhub-summit,1mkVvueQuePTWUOmB,...,0.0,2018-06-11 08:44:48.498000,tr@wetransform.to,Reitz,Thorsten,regular,True,,5b1e3680d8c5b500075ecfd3,4
3,5b1e3680d8c5b500075ecfd3,wetransform GmbH,wetransform,True,1.0,1529311488473,2018-06-11 08:44:48.473,5b1e3680d8c5b500075ecfd4,gtmhub-summit,1mkVvueQuePTWUOmB,...,0.0,2018-06-12 12:20:29.635000,st@wetransform.to,Templer,Simon,regular,True,,5b1e3680d8c5b500075ecfd3,4
4,5b1e3680d8c5b500075ecfd3,wetransform GmbH,wetransform,True,1.0,1529311488473,2018-06-11 08:44:48.473,5b1e3680d8c5b500075ecfd4,gtmhub-summit,1mkVvueQuePTWUOmB,...,0.0,2019-06-25 15:40:06.882000,ch@wetransform.to,Hönn,Christopher,regular,True,,5b1e3680d8c5b500075ecfd3,4


In [9]:
# Last 60 days
threshold_date = today - datetime.timedelta(days=180)

In [10]:
# Filter on users created at least 60 days ago
df = df[df["user|dateCreated.$date"] <= threshold_date]

In [11]:
# Join event data to df
df = df.merge(
    filter_tags,
    how="inner",
    left_on="user|_id.$oid",
    right_on="event.user_id"
)

In [12]:
df.head()

Unnamed: 0,account|_id.$oid,account|name,account|domain,account|isActive,account|type,account|trialEnds.$date,account|dateCreated.$date,account|ownerId.$oid,account|edition,account|subscriptionId,...,user|email,user|auth0Cache.usermetadata.lastName,user|auth0Cache.usermetadata.firstName,user|subscriptionType,user|isPrimary,user|auth0Cache.usermetadata.demo,account|id,account|users_count,event.user_id,event.filtered_by_tag_count
0,5bd1a35727eb330001800428,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,...,eduard.bechler@oms-pruefservice.de,Bechler,Eduard,regular,True,,5bd1a35727eb330001800428,780,5c29bc403d53e90001ade104,2
1,5bd1a35727eb330001800428,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,...,florian.kern@oms-pruefservice.de,Kern,Florian,regular,True,,5bd1a35727eb330001800428,780,5c29c0913d53e90001ade1d6,1
2,5bd1a35727eb330001800428,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,...,kathrin.west@oms-pruefservice.de,West,Kathrin,regular,True,,5bd1a35727eb330001800428,780,5c29c8a636973a00016ceca5,1
3,5bd1a35727eb330001800428,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,...,lars.thanner@oms-pruefservice.de,Thanner,Lars,regular,True,,5bd1a35727eb330001800428,780,5c29c8a53d53e90001ade3c3,1
4,5bd1a35727eb330001800428,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,...,julia.schroedter@oms-pruefservice.de,Seiter,Julia,regular,True,,5bd1a35727eb330001800428,780,5c3726588f005100015ce486,18


In [13]:
df.shape

(975, 55)

In [14]:
# Read in CSDA data
csda_df = pd.read_csv(os.path.join(os.pardir, "shared_data", "csda_salesforce_export.csv"))

# Rename columns
csda_df.columns = [
    "_".join(column.lower().split())
    for column in csda_df.columns
]

# Add prefix for clarity after join
csda_df = csda_df.add_prefix("salesforce|")

In [15]:
# Left join CSDA table with main dataframe (df)
df = df.merge(
    csda_df,
    how="left",
    left_on="account|_id.$oid",
    right_on="salesforce|chargebee_id"
)

In [16]:
# Get Hubspot data
hubspot_df = get_hubspot_data()

Fetching Hubspot data...
Hubspot contacts...
Hubspot companies...


In [17]:
# Merge users with HubSpot data
df = df.merge(
    hubspot_df,
    how="left",
    left_on="user|email",
    right_on="hubspot|email"
)

In [18]:
# Add CSDA column
df["salesforce|team_member_name"] = df["salesforce|team_member_name"].fillna("-")
df["salesforce|is_csda"] = df["salesforce|team_member_name"].map(
    lambda x: True if x != "-" else False
)

In [19]:
# Rename columns intended to be kept
column_name_remapping = {
    "account|_id.$oid": "account.id",
    "account|name": "account.name",
    "account|dateCreated.$date": "account.date_created",
    "chargebee|status": "chargebee.status",
    "user|_id.$oid": "user.id",
    "user|language": "user.language",
    "user|dateCreated.$date": "user.date_created",
    "user|email": "user.email",
    "user|auth0Cache.usermetadata.lastName": "user.last_name",
    "user|auth0Cache.usermetadata.firstName": "user.first_name",
    "account|users_count": "account.users_count",
    "hubspot|jobtitle": "hubspot.job_title",
    "hubspot|industry": "hubspot.industry",
    "hubspot|numberofemployees": "hubspot.number_of_employees",
    "salesforce|is_csda": "salesforce.is_csda",
    "hubspot|country": "hubspot.country",
    "event.filtered_by_tag_count": "event.filtered_by_tag_count"
}

# Keep only the keys of mapping columns
# df = df[column_name_remapping.keys()]

# Change names of columns based on mapping
df = df.rename(columns=column_name_remapping)

In [20]:
# Change language code to categorical
language_code_mapping = {
    0: "English",
    1: "German",
    2: "Chinese",
    3: "Bulgarian",
    4: "Spanish",
    5: "French",
    6: "Portuguese"
}

df = df.replace({"user.language": language_code_mapping})

In [21]:
df.head()

Unnamed: 0,account.id,account.name,account|domain,account|isActive,account|type,account|trialEnds.$date,account.date_created,account|ownerId.$oid,account|edition,account|subscriptionId,...,salesforce|renewal_date,salesforce|employees,hubspot|associatedcompanyid,hubspot|email,hubspot.job_title,hubspot.industry,hubspot.number_of_employees,hubspot|chargebee_id,hubspot.country,salesforce.is_csda
0,5bd1a35727eb330001800428,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,...,11/29/2021,240.0,7278533788,eduard.bechler@oms-pruefservice.de,,Electronics,240,5bd1a35727eb330001800428,Germany,True
1,5bd1a35727eb330001800428,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,...,11/29/2021,240.0,7278533788,florian.kern@oms-pruefservice.de,,Electronics,240,5bd1a35727eb330001800428,Germany,True
2,5bd1a35727eb330001800428,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,...,11/29/2021,240.0,7278533788,kathrin.west@oms-pruefservice.de,,Electronics,240,5bd1a35727eb330001800428,Germany,True
3,5bd1a35727eb330001800428,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,...,11/29/2021,240.0,7278533788,lars.thanner@oms-pruefservice.de,,Electronics,240,5bd1a35727eb330001800428,Germany,True
4,5bd1a35727eb330001800428,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,...,11/29/2021,240.0,7278533788,julia.schroedter@oms-pruefservice.de,,Electronics,240,5bd1a35727eb330001800428,Germany,True


In [22]:
# Fetch user_roles data
user_roles_df = get_user_roles_table(blob_service_client, today_str)
user_roles_df.head()

Unnamed: 0,user_roles._id.$oid,user_roles.userId.$oid,user_roles.roleId.$oid,user_roles.accountId.$oid
0,574e9259ed915d0006b985e5,574e9259ed915d0006b985e4,573d93d9ed915d00052efb6b,574e9259ed915d0006b985e3
1,573e222fed915d0005cc2d07,573e222fed915d0005cc2d06,573d93d9ed915d00052efb6b,573e222fed915d0005cc2d05
2,573d93d9ed915d00052efb6c,573d93d9ed915d00052efb6a,573d93d9ed915d00052efb6b,573d93d9ed915d00052efb69
3,5746e3c8ed915d0005cc319c,5746e3c8ed915d0005cc319b,573d93d9ed915d00052efb6b,5746e3c8ed915d0005cc319a
4,573f32bded915d0005cc2e5d,573f32bded915d0005cc2e5c,573d93d9ed915d00052efb6b,573f32bded915d0005cc2e5b


In [23]:
# Fetch roles data
roles_df = get_roles_table(blob_service_client, today_str)
roles_df.head()

Unnamed: 0,roles._id.$oid,roles.name,roles.accountId.$oid
0,573dbb61ed915d0005cc2c4d,user,
1,5a9f9cbee5274a0007acfcf9,Company B,57fb5f7bed915d0006582898
2,5b86ad85df457100079c04e4,Admin II,57fb5f7bed915d0006582898
3,5b8d2accf9159100080416a2,Air (bo's experiment),573dbb12ed915d0005cc2c46
4,58f632d3ed915d0005e9ef6c,test role,58822288ed915d0005afa6ee


In [24]:
# Join user_roles with roles
user_roles_df = user_roles_df.merge(
    roles_df,
    how="left",
    left_on="user_roles.roleId.$oid",
    right_on="roles._id.$oid"
)

user_roles_df.head()

Unnamed: 0,user_roles._id.$oid,user_roles.userId.$oid,user_roles.roleId.$oid,user_roles.accountId.$oid,roles._id.$oid,roles.name,roles.accountId.$oid
0,574e9259ed915d0006b985e5,574e9259ed915d0006b985e4,573d93d9ed915d00052efb6b,574e9259ed915d0006b985e3,573d93d9ed915d00052efb6b,admin,
1,574e9259ed915d0006b985e5,574e9259ed915d0006b985e4,573d93d9ed915d00052efb6b,574e9259ed915d0006b985e3,573d93d9ed915d00052efb6b,admin,
2,573e222fed915d0005cc2d07,573e222fed915d0005cc2d06,573d93d9ed915d00052efb6b,573e222fed915d0005cc2d05,573d93d9ed915d00052efb6b,admin,
3,573e222fed915d0005cc2d07,573e222fed915d0005cc2d06,573d93d9ed915d00052efb6b,573e222fed915d0005cc2d05,573d93d9ed915d00052efb6b,admin,
4,573d93d9ed915d00052efb6c,573d93d9ed915d00052efb6a,573d93d9ed915d00052efb6b,573d93d9ed915d00052efb69,573d93d9ed915d00052efb6b,admin,


In [25]:
unique_user_roles_df = get_roles_by_user(user_roles_df)
unique_user_roles_df.head()

Unnamed: 0,user_roles.userId.$oid,roles
0,573d9359ed915d00052efb10,SysAdmin
1,573d93d9ed915d00052efb6a,admin
2,573db6aeed915d0005cc2bc5,admin
3,573dbb12ed915d0005cc2c47,"admin, user, user+, Ivan Osmak's team_7ba678c2..."
4,573dbb61ed915d0005cc2c4c,"admin, user, user+, Engineering_a8a9b5181dbd8f..."


In [26]:
# Join df with user_roles_df
df = df.merge(
    unique_user_roles_df,
    how="left",
    left_on="user.id",
    right_on="user_roles.userId.$oid"
)

df.head()

Unnamed: 0,account.id,account.name,account|domain,account|isActive,account|type,account|trialEnds.$date,account.date_created,account|ownerId.$oid,account|edition,account|subscriptionId,...,hubspot|associatedcompanyid,hubspot|email,hubspot.job_title,hubspot.industry,hubspot.number_of_employees,hubspot|chargebee_id,hubspot.country,salesforce.is_csda,user_roles.userId.$oid,roles
0,5bd1a35727eb330001800428,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,...,7278533788,eduard.bechler@oms-pruefservice.de,,Electronics,240,5bd1a35727eb330001800428,Germany,True,5c29bc403d53e90001ade104,"User (Access, Manage OKR), Vertriebsgesellscha..."
1,5bd1a35727eb330001800428,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,...,7278533788,florian.kern@oms-pruefservice.de,,Electronics,240,5bd1a35727eb330001800428,Germany,True,5c29c0913d53e90001ade1d6,"OKR Botschafter, O.U Stuttgart_6f5751189cad26d..."
2,5bd1a35727eb330001800428,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,...,7278533788,kathrin.west@oms-pruefservice.de,,Electronics,240,5bd1a35727eb330001800428,Germany,True,5c29c8a636973a00016ceca5,"User (Access, Manage OKR), Lorch Sales Team Nü..."
3,5bd1a35727eb330001800428,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,...,7278533788,lars.thanner@oms-pruefservice.de,,Electronics,240,5bd1a35727eb330001800428,Germany,True,5c29c8a53d53e90001ade3c3,"User (Access, Manage OKR)"
4,5bd1a35727eb330001800428,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,...,7278533788,julia.schroedter@oms-pruefservice.de,,Electronics,240,5bd1a35727eb330001800428,Germany,True,5c3726588f005100015ce486,"User (Access, Manage OKR), O.U Pforzheim_3d920..."


In [27]:
df["roles"] = df["roles"].fillna("")
# df["user.is_okr_champion"] = df["roles"].map(lambda role: True if "champion" in (_role := role.lower()) or ("okr" in _role and "champion" in _role) else False)

In [28]:
# Drop unnecessary columns
"""
Should I drop all id's?
account.id
user.id

user_roles._id.$oid
user_roles.userId.$oid
user_roles.roleId.$oid
user_roles.accountId.$oid
roles._id.$oid
roles.accountId.$oid

"""

df = df[
    df.columns[
        ~df.columns.isin(
            [
                "account.id",
                "user.id",
                "user_roles._id.$oid",
                "user_roles.userId.$oid",
                "user_roles.roleId.$oid",
                "user_roles.accountId.$oid",
                "roles._id.$oid",
                "roles.accountId.$oid"
            ]
        )
    ]
]

In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 979 entries, 0 to 978
Data columns (total 71 columns):
 #   Column                                 Non-Null Count  Dtype         
---  ------                                 --------------  -----         
 0   account.name                           979 non-null    object        
 1   account|domain                         979 non-null    object        
 2   account|isActive                       979 non-null    bool          
 3   account|type                           979 non-null    float64       
 4   account|trialEnds.$date                979 non-null    int64         
 5   account.date_created                   979 non-null    datetime64[ns]
 6   account|ownerId.$oid                   979 non-null    object        
 7   account|edition                        979 non-null    object        
 8   account|subscriptionId                 979 non-null    object        
 9   account|planId                         979 non-null    object    

In [30]:
df["user.date_created"] = df["user.date_created"].astype("datetime64[ns]")
df["account.date_created"] = df["account.date_created"].astype("datetime64[ns]")

In [31]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 979 entries, 0 to 978
Data columns (total 71 columns):
 #   Column                                 Non-Null Count  Dtype         
---  ------                                 --------------  -----         
 0   account.name                           979 non-null    object        
 1   account|domain                         979 non-null    object        
 2   account|isActive                       979 non-null    bool          
 3   account|type                           979 non-null    float64       
 4   account|trialEnds.$date                979 non-null    int64         
 5   account.date_created                   979 non-null    datetime64[ns]
 6   account|ownerId.$oid                   979 non-null    object        
 7   account|edition                        979 non-null    object        
 8   account|subscriptionId                 979 non-null    object        
 9   account|planId                         979 non-null    object    

In [32]:
df.head()

Unnamed: 0,account.name,account|domain,account|isActive,account|type,account|trialEnds.$date,account.date_created,account|ownerId.$oid,account|edition,account|subscriptionId,account|planId,...,salesforce|employees,hubspot|associatedcompanyid,hubspot|email,hubspot.job_title,hubspot.industry,hubspot.number_of_employees,hubspot|chargebee_id,hubspot.country,salesforce.is_csda,roles
0,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,gtmhub-enterprise-oms-1y,...,240.0,7278533788,eduard.bechler@oms-pruefservice.de,,Electronics,240,5bd1a35727eb330001800428,Germany,True,"User (Access, Manage OKR), Vertriebsgesellscha..."
1,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,gtmhub-enterprise-oms-1y,...,240.0,7278533788,florian.kern@oms-pruefservice.de,,Electronics,240,5bd1a35727eb330001800428,Germany,True,"OKR Botschafter, O.U Stuttgart_6f5751189cad26d..."
2,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,gtmhub-enterprise-oms-1y,...,240.0,7278533788,kathrin.west@oms-pruefservice.de,,Electronics,240,5bd1a35727eb330001800428,Germany,True,"User (Access, Manage OKR), Lorch Sales Team Nü..."
3,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,gtmhub-enterprise-oms-1y,...,240.0,7278533788,lars.thanner@oms-pruefservice.de,,Electronics,240,5bd1a35727eb330001800428,Germany,True,"User (Access, Manage OKR)"
4,OMS Prüfservice GmbH,growth,True,1.0,1543449600000,2018-10-25 11:04:55.334,5bd1a35727eb330001800429,gtmhub-enterprise-v2,1mk51YnR7UCENkTmG,gtmhub-enterprise-oms-1y,...,240.0,7278533788,julia.schroedter@oms-pruefservice.de,,Electronics,240,5bd1a35727eb330001800428,Germany,True,"User (Access, Manage OKR), O.U Pforzheim_3d920..."


In [33]:
# Format datetime fields as string for output file
df["account.date_created"] = df["account.date_created"].dt.strftime("%Y/%m/%d")
df["user.date_created"] = df["user.date_created"].dt.strftime("%Y/%m/%d")

In [34]:
df.shape

(979, 71)

In [35]:
# Save data to csv file
df.to_csv(f"tag_filtering_users_data_{datetime_string(today, '-')}.csv", index=False)