## Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%aimport

Modules to reload:
all-except-skipped

Modules to skip:



In [3]:
import json
from snowflake.snowpark.session import Session
import snowflake.snowpark.functions as F
import snowflake.snowpark.types as T
from dotenv import find_dotenv
from pathlib import Path
import sys

project_home1 = Path(find_dotenv()).parent
sys.path.append(str(project_home1))

from constants1 import *


## Data Pipline

### Snowpark Session

In [4]:
with open(project_home1 / 'config/creds.json', 'r') as ff:
    conn_param=json.load(ff)

session = Session.builder.configs(conn_param).create() 


### Database, Schema & resource creation

In [5]:
session.sql(f"create database if not exists {SNOWPATROL_DB}").collect()
session.sql(f"create schema if not exists {SNOWPATROL_DB}.{DEVOPS_SH}").collect()
session.sql(f"create stage if not exists {SNOWPATROL_DB}.{DEVOPS_SH}.{DATA_STAGE}").collect()
session.sql(f"create stage if not exists {SNOWPATROL_DB}.{DEVOPS_SH}.{DEPS_STAGE}").collect()
session.sql(f"create stage if not exists {SNOWPATROL_DB}.{DEVOPS_SH}.{MODELS_STAGE}").collect()
session.sql(f"create stage if not exists {SNOWPATROL_DB}.{DEVOPS_SH}.{OBJECT_STAGE}").collect()

# This File Format uses a special option available for CSV schema inference
session.sql(f"create file format if not exists {SNOWPATROL_DB}.{DEVOPS_SH}.ff_csv_with_header type=CSV parse_header=TRUE field_optionally_enclosed_by='\"'").collect()

[Row(status='File format FF_CSV_WITH_HEADER successfully created.')]

### Sample data upload

In [6]:
data_dir = project_home1 / "data1" 
session.sql(f"PUT file://{project_home1}/data1/obfuscated/* @{SNOWPATROL_DB}.{DEVOPS_SH}.{DATA_STAGE} overwrite=True auto_compress=True").collect()
session.sql(f"PUT file://{project_home1}/data1/generated/whitelisted_users.csv @{SNOWPATROL_DB}.{DEVOPS_SH}.{DATA_STAGE} overwrite=True auto_compress=True").collect()
session.sql(f"PUT file://{project_home1}/data1/generated/sample_okta_logs.csv @{SNOWPATROL_DB}.{DEVOPS_SH}.{DATA_STAGE} overwrite=True auto_compress=True").collect()
session.sql(f"PUT file://{project_home1}/data1/generated/monitored_apps.csv @{SNOWPATROL_DB}.{DEVOPS_SH}.{DATA_STAGE} overwrite=True auto_compress=True").collect()



[Row(source='monitored_apps.csv', target='monitored_apps.csv.gz', source_size=86, target_size=128, source_compression='NONE', target_compression='GZIP', status='UPLOADED', message='')]

In [7]:

session.sql(f"PUT file://{project_home1}/Src1/python/snowpatrol/train1.py @{SNOWPATROL_DB}.{DEVOPS_SH}.{DEPS_STAGE} overwrite=True auto_compress=False").collect()
session.sql(f"PUT file://{project_home1}/Src1/python/snowpatrol/constants1.py @{SNOWPATROL_DB}.{DEVOPS_SH}.{DEPS_STAGE} overwrite=True auto_compress=False").collect()

[Row(source='constants1.py', target='constants1.py', source_size=567, target_size=576, source_compression='NONE', target_compression='NONE', status='UPLOADED', message='')]

### Table creation to hold source data - FP data

> Note: Uses Schema detection on CSV

In [8]:
session.sql(
    f"""
    CREATE TABLE IF NOT EXISTS {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_MONITORED_APPS}
    USING TEMPLATE (
    SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*))
      FROM TABLE(
        INFER_SCHEMA(
          LOCATION=>'@{SNOWPATROL_DB}.{DEVOPS_SH}.{DATA_STAGE}/monitored_apps.csv',
          FILE_FORMAT=>'snowpatrol.MAIN.ff_csv_with_header',
          IGNORE_CASE => TRUE
        )
      ))
    """
).collect()

session.sql(
    f"""
    CREATE TABLE IF NOT EXISTS {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_OKTA_USERS}
    USING TEMPLATE (
    SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*))
      FROM TABLE(
        INFER_SCHEMA(
          LOCATION=>'@{SNOWPATROL_DB}.{DEVOPS_SH}.{DATA_STAGE}/sample_okta_logs.csv',
          FILE_FORMAT=>'snowpatrol.MAIN.ff_csv_with_header',
          IGNORE_CASE => TRUE
        )
      ))
    """
).collect()

session.sql(
    f"""
    CREATE TABLE IF NOT EXISTS {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_APP_LOGS}
    USING TEMPLATE (
    SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*))
      FROM TABLE(
        INFER_SCHEMA(
          LOCATION=>'@{SNOWPATROL_DB}.{DEVOPS_SH}.{DATA_STAGE}/SnowPatrol_validation_data_app1 _.csv',
          FILE_FORMAT=>'snowpatrol.MAIN.ff_csv_with_header',
          IGNORE_CASE => TRUE
        )
      ))
    """
).collect()


session.sql(
    f"""
    CREATE TABLE IF NOT EXISTS {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_WORK_DAYS}
    USING TEMPLATE (
    SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*))
      FROM TABLE(
        INFER_SCHEMA(
          LOCATION=>'@{SNOWPATROL_DB}.{DEVOPS_SH}.{DATA_STAGE}/SnowPatrol_Working_Days.csv',
          FILE_FORMAT=>'snowpatrol.MAIN.ff_csv_with_header',
          IGNORE_CASE => TRUE
        )
      ))
    """
).collect()

session.sql(
    f"""
    CREATE TABLE IF NOT EXISTS {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_EMP_METADATA}
    USING TEMPLATE (
    SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*))
      FROM TABLE(
        INFER_SCHEMA(
          LOCATION=>'@{SNOWPATROL_DB}.{DEVOPS_SH}.{DATA_STAGE}/SnowPatrol_validation_emp_details_app1.csv',
          FILE_FORMAT=>'snowpatrol.MAIN.ff_csv_with_header',
          IGNORE_CASE => TRUE
        )
      ))
    """
).collect()

session.sql(
    f"""
    CREATE TABLE IF NOT EXISTS {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_WHITELISTED_USERS}
    USING TEMPLATE (
    SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*))
      FROM TABLE(
        INFER_SCHEMA(
          LOCATION=>'@{SNOWPATROL_DB}.{DEVOPS_SH}.{DATA_STAGE}/whitelisted_users.csv',
          FILE_FORMAT=>'snowpatrol.MAIN.ff_csv_with_header',
          IGNORE_CASE => TRUE
        )
      ))
    """
).collect()




[Row(status='Table WHITELISTED_USERS successfully created.')]

### Copying data into respective source tables

In [9]:
session.sql(f"TRUNCATE TABLE {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_MONITORED_APPS}").collect()
session.sql(
    f""" 
    copy into {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_MONITORED_APPS}
    from @{SNOWPATROL_DB}.{DEVOPS_SH}.{DATA_STAGE}
    files = ('monitored_apps.csv.gz') 
    file_format = (type = CSV skip_header=1 field_optionally_enclosed_by='"')
""").collect()

session.sql(f"TRUNCATE TABLE {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_OKTA_USERS}").collect()
session.sql(
    f""" 
    copy into {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_OKTA_USERS}
    from @{SNOWPATROL_DB}.{DEVOPS_SH}.{DATA_STAGE}
    files = ('sample_okta_logs.csv.gz') 
    file_format = (type = CSV skip_header=1 field_optionally_enclosed_by='"')
""").collect()

session.sql(f"TRUNCATE TABLE {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_APP_LOGS}").collect()

session.sql(
    f""" 
    copy into {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_APP_LOGS}
    from @{SNOWPATROL_DB}.{DEVOPS_SH}.{DATA_STAGE}
    files = ('SnowPatrol_validation_data_app1 _.csv.gz','SnowPatrol_validation_data_app2 _.csv.gz','SnowPatrol_validation_data_app3 _.csv.gz','SnowPatrol_validation_data_app4 _.csv.gz')
    file_format = (type = CSV skip_header=1 field_optionally_enclosed_by='"')
""").collect()

# Sample work days table - company schedule of whether each day is a work day or not
session.sql(f"TRUNCATE TABLE {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_WORK_DAYS}").collect()

session.sql(
    f""" 
    copy into {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_WORK_DAYS}
    from @{SNOWPATROL_DB}.{DEVOPS_SH}.{DATA_STAGE}
    files = ('SnowPatrol_Working_Days.csv.gz')
    file_format = (type = CSV skip_header=1 field_optionally_enclosed_by='"')
""").collect()

session.sql(f"TRUNCATE TABLE {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_EMP_METADATA}").collect()

session.sql(
    f""" 
    copy into {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_EMP_METADATA}
    from @{SNOWPATROL_DB}.{DEVOPS_SH}.{DATA_STAGE}
    files = ('SnowPatrol_validation_emp_details_app1.csv.gz','SnowPatrol_validation_emp_details_app2.csv.gz','SnowPatrol_validation_emp_details_app3.csv.gz','SnowPatrol_validation_emp_details_app4.csv.gz')
    file_format = (type = CSV skip_header=1 field_optionally_enclosed_by='"')
    ON_ERROR="CONTINUE"
""").collect()

session.sql(
    f""" 
    copy into {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_WHITELISTED_USERS}
    from @{SNOWPATROL_DB}.{DEVOPS_SH}.{DATA_STAGE}
    files = ('whitelisted_users.csv.gz') 
    file_format = (type = CSV skip_header=1 field_optionally_enclosed_by='"')
""").collect()

[Row(file='data/whitelisted_users.csv.gz', status='LOADED', rows_parsed=1, rows_loaded=1, error_limit=1, errors_seen=0, first_error=None, first_error_line=None, first_error_character=None, first_error_column_name=None)]

In [10]:
# import pandas as pd
# Data=session.sql(f"""select e.session_user,a.snapshot_datetime from                                     
# {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_EMP_METADATA} e,{SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_APP_LOGS} a 
# where a.snapshot_datetime <= '08/04/2023' and e.session_user=a.session_user""").to_pandas()
# #Data1=pd.DataFrame(Data)


In [11]:
# Data

In [12]:
#%store Data1 

In [13]:
#my code
import pandas as pd
a=session.sql(f""" select (SESSION_USER) from {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_EMP_METADATA}""").collect()
a1=pd.DataFrame(a)

In [14]:
a1

Unnamed: 0,SESSION_USER
0,dhamodharan.balamani@anblicks.com
1,nishant.shah@anblicks.com
2,devang.patel@anblicks.com
3,manoj.kumar@anblicks.com
4,gowtham.voruganti@anblicks.com
...,...
700,fuad.nour@anblicks.com
701,ip-service@anblicks.com
702,pradeep.kollu@anblicks.com
703,shani.jangid@anblicks.com


In [15]:
len(a1)

705

In [16]:
email=[]
count=0
for i in range(len(a1)):
    #print (a1['SESSION_USER'][i])
    count=count+1
    email.append(a1['SESSION_USER'][i])
print(count)
    


705


In [17]:
email

['dhamodharan.balamani@anblicks.com',
 'nishant.shah@anblicks.com',
 'devang.patel@anblicks.com',
 'manoj.kumar@anblicks.com',
 'gowtham.voruganti@anblicks.com',
 'pranshu.joshi@anblicks.com',
 'kotapati.saitha@anblicks.com',
 'rasi.konatham@anblicks.com',
 'samir.bhatt@anblicks.com',
 'munwar@anblicks.com',
 'abishek.kunduru@anblicks.com',
 'lalitha.nasina@anblicks.com',
 'sandeep.nidumukkala@anblicks.com',
 'gaurav.patel@anblicks.com',
 'geetanjali.dash@anblicks.com',
 'sajja.ratnasravya@anblicks.com',
 'vijay.diwakar@anblicks.com',
 'ankit.patel@anblicks.com',
 'vinay.gundavaram@anblicks.com',
 'purvesh.kachhiya@anblicks.com',
 'abhilash.p@anblicks.com',
 'raja.seeni@anblicks.com',
 'mitesh.gajjar@anblicks.com',
 'bansilal.keloth@anblicks.com',
 'nikunj.ranpura@anblicks.com',
 'jaimin.patel@anblicks.com',
 'ronak.padhya@anblicks.com',
 'nilesh.jain@anblicks.com',
 'hardik.sanghani@anblicks.com',
 'deepika.mudiyala@anblicks.com',
 'santosh.yadav@anblicks.com',
 'abhishek.makhija@anbl

In [18]:
print(email)

['dhamodharan.balamani@anblicks.com', 'nishant.shah@anblicks.com', 'devang.patel@anblicks.com', 'manoj.kumar@anblicks.com', 'gowtham.voruganti@anblicks.com', 'pranshu.joshi@anblicks.com', 'kotapati.saitha@anblicks.com', 'rasi.konatham@anblicks.com', 'samir.bhatt@anblicks.com', 'munwar@anblicks.com', 'abishek.kunduru@anblicks.com', 'lalitha.nasina@anblicks.com', 'sandeep.nidumukkala@anblicks.com', 'gaurav.patel@anblicks.com', 'geetanjali.dash@anblicks.com', 'sajja.ratnasravya@anblicks.com', 'vijay.diwakar@anblicks.com', 'ankit.patel@anblicks.com', 'vinay.gundavaram@anblicks.com', 'purvesh.kachhiya@anblicks.com', 'abhilash.p@anblicks.com', 'raja.seeni@anblicks.com', 'mitesh.gajjar@anblicks.com', 'bansilal.keloth@anblicks.com', 'nikunj.ranpura@anblicks.com', 'jaimin.patel@anblicks.com', 'ronak.padhya@anblicks.com', 'nilesh.jain@anblicks.com', 'hardik.sanghani@anblicks.com', 'deepika.mudiyala@anblicks.com', 'santosh.yadav@anblicks.com', 'abhishek.makhija@anblicks.com', 'durgavikesh.pulapa@

In [19]:
%store email

Stored 'email' (list)


In [20]:
b=session.sql(f""" select (last_day_of_work) from {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_EMP_METADATA}""").collect()
b1=pd.DataFrame(b)

In [21]:
b1

Unnamed: 0,LAST_DAY_OF_WORK
0,2040-07-04
1,2040-07-04
2,2040-07-04
3,2040-07-04
4,2040-07-04
...,...
700,2040-07-04
701,2040-07-04
702,2040-07-04
703,2040-07-04


In [22]:
login=[]
count=0
for i in range(len(b1)):
    #print (a1['SESSION_USER'][i])
    count=count+1
    login.append(b1['LAST_DAY_OF_WORK'][i])
print(count)
    


705


In [23]:
login

[datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040, 7, 4),
 datetime.date(2040,

In [24]:
print(login)

[datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), datetime.date(2040, 7, 4), 

In [25]:
%store login

Stored 'login' (list)


In [26]:
#my code
import pandas as pd

d=session.sql(f""" select SNAPSHOT_DATETIME from {SNOWPATROL_DB}.{DEVOPS_SH}.{TBL_OKTA_USERS}""").collect()
d1=pd.DataFrame(d)

In [27]:
d1

Unnamed: 0,SNAPSHOT_DATETIME
0,2023-06-09
1,2023-08-27
2,2023-08-25
3,2023-08-07
4,2023-08-18
...,...
700,2023-08-10
701,2023-08-17
702,2023-06-22
703,2023-08-22


In [28]:
Order_Date=[]
count=0
for i in range(len(d1)):
    #print (a1['SESSION_USER'][i])
    count=count+1
    Order_Date.append(d1['SNAPSHOT_DATETIME'][i])
print(count)

705


In [29]:
print(Order_Date)

[datetime.date(2023, 6, 9), datetime.date(2023, 8, 27), datetime.date(2023, 8, 25), datetime.date(2023, 8, 7), datetime.date(2023, 8, 18), datetime.date(2023, 8, 17), datetime.date(2023, 3, 26), datetime.date(2023, 8, 23), datetime.date(2023, 5, 10), datetime.date(2023, 8, 10), datetime.date(2023, 7, 16), datetime.date(2023, 8, 1), datetime.date(2021, 12, 31), datetime.date(2023, 7, 2), datetime.date(2022, 8, 17), datetime.date(2023, 8, 23), datetime.date(2023, 2, 24), datetime.date(2023, 8, 16), datetime.date(2023, 8, 21), datetime.date(2023, 8, 25), datetime.date(2023, 8, 24), datetime.date(2022, 2, 7), datetime.date(2023, 7, 3), datetime.date(2023, 7, 3), datetime.date(2023, 8, 22), datetime.date(2023, 8, 24), datetime.date(2023, 8, 17), datetime.date(2023, 7, 27), datetime.date(2023, 8, 21), datetime.date(2023, 6, 15), datetime.date(2023, 8, 16), datetime.date(2021, 12, 31), datetime.date(2023, 3, 7), datetime.date(2023, 8, 23), datetime.date(2023, 7, 21), datetime.date(2023, 8, 26

In [30]:
import pandas as pd

In [31]:
d1["SNAPSHOT_DATETIME"] = pd.to_datetime(d1["SNAPSHOT_DATETIME"])

startDate = pd.to_datetime(d1["SNAPSHOT_DATETIME"]).min()

endDate = pd.to_datetime(d1["SNAPSHOT_DATETIME"]).max()

 

In [32]:
startDate

Timestamp('2021-12-01 00:00:00')

In [33]:
endDate

Timestamp('2023-08-28 00:00:00')

### Local utility functions

In [34]:
session.sql("use database DEVOPS_SH")

<snowflake.snowpark.dataframe.DataFrame at 0x2215acb6b90>

In [36]:
session.udf.register_from_file(
      file_path="train1.py"
      , func_name="contains_anyof"
      , name=f"{SNOWPATROL_DB}.{DEVOPS_SH}.udf_contains_anyof"
      , is_permanent=True
      , packages = ["snowflake-snowpark-python"]
      , imports=[f"@{SNOWPATROL_DB}.{DEVOPS_SH}.{DEPS_STAGE}/constants1.py"]
      , stage_location=f"{SNOWPATROL_DB}.{DEVOPS_SH}.{OBJECT_STAGE}"
      , replace=True
)

<snowflake.snowpark.udf.UserDefinedFunction at 0x2215cbae7a0>

### Model Training

In [37]:
session.sproc.register_from_file(
    file_path = f"@{SNOWPATROL_DB}.{DEVOPS_SH}.{DEPS_STAGE}/train1.py"
    ,func_name = "run_model_today"
    ,name=f"{SNOWPATROL_DB}.{DEVOPS_SH}.run_model_today"
    ,input_types=[T.IntegerType(),T.IntegerType(),T.FloatType(),T.BooleanType(),T.BooleanType(),T.BooleanType(), T.BooleanType()]
    ,return_type=T.VariantType()
    ,is_permanent=True
    ,replace=True
    ,stage_location=f"@{SNOWPATROL_DB}.{DEVOPS_SH}.{OBJECT_STAGE}"
    ,packages=['snowflake-snowpark-python','pandas','scikit-learn==1.2.1','joblib==1.1.1', 'numpy']
    ,imports=[f"@{SNOWPATROL_DB}.{DEVOPS_SH}.{DEPS_STAGE}/constants1.py"]
)

Package 'scikit-learn' is not installed in the local environment. Your UDF might not work when the package is installed on the server but not on your local environment.
Package 'joblib' is not installed in the local environment. Your UDF might not work when the package is installed on the server but not on your local environment.


<snowflake.snowpark.stored_procedure.StoredProcedure at 0x2215cbac6a0>

## License usage probablity prediction with revocation decision
Local trigger

In [None]:
# %%time
# from train1 import run_model_today
# results = session.call('run_model_today', 1,45,0.5,False,False)
# display(results)

In [None]:
# license_prediction_feature_set = session.table(f"{SNOWPATROL_DB}.{SNOWPATROL_SCHEMA}.license_prediction_feature_set")
# license_usage_probability = license_prediction_feature_set.select(
#     F.col("session_user")
#     , F.col("title")
#     , F.col("department")
#     , F.col("division")
#     , F.col("work_days_since_last_login")
#     ,F.col("training_date").alias("model_trained_on")
#     ,F.col("cutoff_date")
#     , F.call_udf(f"{SNOWPATROL_DB}.{SNOWPATROL_SCHEMA}.udf_predict_login_probability", *[F.col(c) for c in ["weighted_authentications_per_day","work_days_since_last_login", 'authentications_per_day', 'log_work_days_since_last_login']]).alias("probability_no_login")
# )
# license_revocation_decision = license_usage_probability.with_column("revoke_access", F.iff(F.col("probability_no_login") > 0.5, 1, 0))

# license_revocation_decision.write.mode("overwrite").save_as_table(f"{SNOWPATROL_DB}.{SNOWPATROL_SCHEMA}.license_revocation_decision")