In [None]:
%%capture

!pip install gspread
!pip install oauth2client
!pip install slackclient
!pip install -U snowflake-connector-python
!pip install -U snowflake-snowpark-python
!pip install --upgrade psycopg2
!pip install -U sqlalchemy
!pip install df2gspread

In [None]:
%%capture

from snowflake.snowpark import Session 
import os
import boto3
import base64
import json
from requests import get
from pathlib import Path
from datetime import datetime, timedelta, date
import time
import zipfile
import io
import sqlalchemy
from pathlib import Path
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from pprint import pprint
import pandas as pd
import numpy as np
import psycopg2
import sys
import requests
from botocore.exceptions import ClientError
import importlib
from io import StringIO
import snowflake.connector
from sqlalchemy import create_engine
import asyncio
import nest_asyncio
import re
import pytz
import pandas as pd
import snowflake.connector

# Defining Functions

In [None]:
import os
import boto3
import base64
from botocore.exceptions import ClientError
import json
from requests import get
from pathlib import Path
from datetime import datetime

def imports():
    import os
    import pandas as pd
    import numpy as np
    import psycopg2

import json
import boto3
import os
from pathlib import Path

def get_secret(secret_name):
    region_name = "us-east-1"

    # Create a Secrets Manager client
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )

    # In this sample we only handle the specific exceptions for the 'GetSecretValue' API.
    # See https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
    # We rethrow the exception by default.

    try:
        get_secret_value_response = client.get_secret_value(
            SecretId=secret_name
        )
    except ClientError as e:
        if e.response['Error']['Code'] == 'DecryptionFailureException':
            # Secrets Manager can't decrypt the protected secret text using the provided KMS key.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'InternalServiceErrorException':
            # An error occurred on the server side.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'InvalidParameterException':
            # You provided an invalid value for a parameter.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'InvalidRequestException':
            # You provided a parameter value that is not valid for the current state of the resource.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'ResourceNotFoundException':
            # We can't find the resource that you asked for.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
    else:
        # Decrypts secret using the associated KMS CMK.
        # Depending on whether the secret is a string or binary, one of these fields will be populated.
        if 'SecretString' in get_secret_value_response:
            return get_secret_value_response['SecretString']
        else:
            return base64.b64decode(get_secret_value_response['SecretBinary'])
        
def initialize_env():
    snowflake_sg_secret = json.loads(get_secret("Snowflake-sagemaker"))
    slack_secret = json.loads(get_secret("prod/slack/reports"))
    rowaa_metabase_access = json.loads(get_secret("prod/metabase/rowaa/user"))
    dwh_writer_secret = json.loads(get_secret("prod/db/datawarehouse/sagemaker"))

    os.environ["SNOWFLAKE_USERNAME"] = snowflake_sg_secret["username"]
    os.environ["SNOWFLAKE_PASSWORD"] = snowflake_sg_secret["password"]
    os.environ["SNOWFLAKE_ACCOUNT"] = snowflake_sg_secret["account"]
    os.environ["SNOWFLAKE_DATABASE"] = snowflake_sg_secret["database"]

    os.environ["SLACK_TOKEN"] = slack_secret["token"]

    os.environ["METABASE_USERNAME_ROWAA"] = rowaa_metabase_access["username"]
    os.environ["METABASE_PASSWORD_ROWAA"] = rowaa_metabase_access["password"]

    os.environ["DWH_WRITER_HOST_NEW"] = dwh_writer_secret["host"]
    os.environ["DWH_WRITER_NAME_NEW"] = dwh_writer_secret["dbname"]
    os.environ["DWH_WRITER_USER_NAME_NEW"] = dwh_writer_secret["username"]
    os.environ["DWH_WRITER_PASSWORD_NEW"] = dwh_writer_secret["password"] 

    json_path_sheets = str(Path.home()) + "/service_account_key_sheets.json"
    sheets_key = get_secret("prod/maxab-sheets")
    f = open(json_path_sheets, "w")
    f.write(sheets_key)
    f.close()
    os.environ["GOOGLE_APPLICATION_CREDENTIALS_SHEETS"] = json_path_sheets
    

def get_from_gsheet(workbook, sheet):
    scope = ["https://spreadsheets.google.com/feeds",
         'https://www.googleapis.com/auth/spreadsheets',
         "https://www.googleapis.com/auth/drive.file",
         "https://www.googleapis.com/auth/drive"]
    initialize_env()
    creds = ServiceAccountCredentials.from_json_keyfile_name(os.environ["GOOGLE_APPLICATION_CREDENTIALS_SHEETS"], scope)
    client = gspread.authorize(creds)
    try:
        wks = client.open(workbook).worksheet(sheet)
        sheet = pd.DataFrame(wks.get_all_records())
    except:
        print(sheet,'failed to fetch data')
    
    return sheet

#### Defining Functions used in AM Data Assignment

In [None]:
## assigning data in any project
def assign_data(df, list, assigns):

    actual_length = len(df) % len(list)
    rows_needed = len(list) - actual_length
    columns_needed = len(df.axes[1])
    df = df.sample(frac=1)
    empty_df = pd.DataFrame(index=np.arange(rows_needed), columns=np.arange(columns_needed))
    assgn = pd.concat([df, empty_df])
    assgn = assgn.dropna(axis=1, how='all')
    mplr = len(assgn) / len(list) 
    assgn = assgn.assign(agent_assigned=[val for val in list for _ in range(int(mplr))])
    assgn = assgn.dropna()

    if assgn.shape[1] != 1:

            assgn["main_system_id"] = assgn["main_system_id"].fillna('').astype(str).str.replace(".0","",regex=False)
            assgn["retailer_mobile_number"] = assgn["retailer_mobile_number"].fillna('').astype(str).str.replace(".0","",regex=False)
            df = assgn.groupby('agent_assigned').head(assigns)
            return df
    else:
            assgn.drop(['agent_assigned'], axis=1)
            assgn = assgn.iloc[0:0]
            assgn = assgn.assign(index="", main_system_id = "",retailer_name = "", retailer_mobile_number="",description="",offer="",reward="",balance="",priority="",agent_assigned="")
            assgn = assgn[["index","main_system_id","retailer_name","retailer_mobile_number","description","offer","reward","balance","priority","agent_assigned"]]
            df = assgn.groupby('agent_assigned').head(assigns)
            return df


## removing previously assigned - recharge
def remove_assign(previously_assigned, df, assigns):
    if  previously_assigned.shape[0] != 1:
        previously_assigned[0] = previously_assigned[0].fillna('').astype(str).str.replace(" ","",regex=False)
        previously_assigned = previously_assigned.dropna()
        previously_assigned[0] = previously_assigned[0].astype('float')
        previously_assigned[0] = previously_assigned[0].astype('int')
        df['main_system_id'] = df['main_system_id'].astype('int')
        main_data_to_assign = pd.DataFrame(df.loc[~df['main_system_id'].isin(previously_assigned[0].astype(int).values)])
        main_data_to_assign['main_system_id'] = main_data_to_assign['main_system_id'].astype('int')
        main_data_to_assign = main_data_to_assign.groupby('agent_assigned').head(assigns)
        return main_data_to_assign
    else:
        df['main_system_id'] = df['main_system_id'].astype('int')
        main_data_to_assign = df.groupby('agent_assigned').head(assigns)
        return main_data_to_assign
    

#### Initializing GSheet Credentials

In [None]:
import oauth2client
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from pprint import pprint
from df2gspread import df2gspread as d2g
import pandas as pd
import os
import numpy as np
import psycopg2
import sys
import requests

import importlib

import os
import psycopg2
import numpy as np

from datetime import datetime, timedelta, date
import datetime
import copy
import pandas as pd
from tqdm import tqdm
import time
import datetime as dt

initialize_env()
from oauth2client.service_account import ServiceAccountCredentials
# define the scope
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']
# add credentials to the account
creds = ServiceAccountCredentials.from_json_keyfile_name(os.environ["GOOGLE_APPLICATION_CREDENTIALS_SHEETS"], scope)
# authorize the clientsheet
client = gspread.authorize(creds)

#### Defining ret_metabase Question

In [None]:
import os
import requests
import json
import pandas as pd
import numpy as np
import re
import datetime
import seaborn as sns
from io import StringIO
import sys
from datetime import datetime

def ret_metabase(question):
    question_id = str(question)
    file_name = 'meta_results.csv'
    # Do not modify below this
    base_url = 'https://bi.maxab.info/api'
    base_headers = {'Content-Type' : 'application/json'}

    try:
        s_response = requests.post(base_url + '/session',
                                data = json.dumps({'username': os.environ["METABASE_USERNAME_ROWAA"], 'password': os.environ["METABASE_PASSWORD_ROWAA"]}),
                                headers=base_headers)
        s_response.raise_for_status()

        session_token = s_response.json()['id']
        base_headers['X-Metabase-Session'] = session_token

        p_response = requests.post(base_url + '/card/' + question_id + '/query/csv', headers=base_headers)
        p_response.raise_for_status()

        my_dict = p_response.content
        s = str(my_dict,'utf-8')
        my_dict = StringIO(s)
        df = pd.read_csv(my_dict)
        return(df)

    except requests.exceptions.HTTPError as errh:
        return(errh)
    except requests.exceptions.ConnectionError as errc:
        return(errc)
    except requests.exceptions.Timeout as errt:
        return(errt)
    except requests.exceptions.RequestException as err:
        return(err)
    
#run query save on metabse with ID, 
#for example here query 1606 is save in my personal collection and I can run it like line below
#ret_metabase(1606)


### AM Attendance [Table]

In [None]:
# database connection:
import os
import psycopg2
import numpy as np
import sqlalchemy
import os
import pandas as pd 
from datetime import datetime

## setup the environment:
initialize_env()

## get secrets:
host=os.environ["DWH_WRITER_HOST_NEW"]
database=os.environ["DWH_WRITER_NAME_NEW"]
user=os.environ["DWH_WRITER_USER_NAME_NEW"]
password=os.environ["DWH_WRITER_PASSWORD_NEW"]

conn = psycopg2.connect(host=host, database=database, user=user, password=password)
print("Successfully connected to DB")

try:
        ## get the df:
    df_3 = get_from_gsheet("daily_automatic_assignment","attendance_table")
        
        ## db connection:
    engine = sqlalchemy.create_engine(f"postgresql+psycopg2://{user}:{password}@{host}/{database}")
    print(bool(engine))
        
        
        # data cleaning in am_attendance
    for col in ['id','agent_id']:
        df_3.loc[df_3[col]!='',col] = df_3.loc[df_3[col]!='',col].astype("int")  
            
    for col in ['date']:
        df_3.loc[df_3[col]!='',col] = df_3.loc[df_3[col]!='',col].astype("datetime64[ns]")
        
        # add last_updated_at: 
    df_3["last_updated_at"] = datetime.now() 
       
        ## update the table on db:
    with engine.connect() as conn:
        print("start")
        df_3.to_sql(name='am_attendance',schema='fintech', con=engine, if_exists='replace',chunksize=1000,method='multi')
        print("end")
        conn.close()
            
        ## close the connection:
    conn.close()
except Exception as e:
    print(f'attendance table issue: {e}')
pass

### Shift Schedule

In [None]:
# Shift Schedule
from datetime import datetime, timedelta
now = datetime.now() + timedelta(hours=3)
hour = int(str(now.time())[0:2])
attendance = ret_metabase(13502)

attendance_copy = attendance.copy()

attendance_copy['start_time'] = attendance_copy['start_time'].astype(int)
attendance_copy['end_time'] = attendance_copy['end_time'].astype(int)

attendance_copy['assignment_start_time'] = attendance_copy['start_time'] - 1
attendance_copy['assignment_end_time'] = attendance_copy['end_time'] - 2

attendance_copy['assign_data'] =  np.where((hour >= attendance_copy['assignment_start_time']
                                )
                                &
                                (hour <= attendance_copy['assignment_end_time'])    
                                , 'yes','no' )


task_based_agents = attendance_copy.loc[(attendance_copy['project'] == 'task_based') 
                 &
                 (attendance_copy['assign_data'] =='yes')]

task_based_list = task_based_agents['agent_id'].values.tolist()
number_of_task_based_agents = len(task_based_list)

### Dispatching the Data

In [None]:
if number_of_task_based_agents != 0:
    # setup the environment:
    initialize_env()

    df_2 = ret_metabase(18915)
    df_2.columns = map(str.lower, df_2.columns)
    df_4 = ret_metabase(36299)
    df_4.columns = map(str.lower, df_4.columns)
    df_5 = ret_metabase(35981)
    df_5.columns = map(str.lower, df_5.columns)
    df_7 = ret_metabase(23764)
    df_7.columns = map(str.lower, df_7.columns)
    df_11 = ret_metabase(38188)
    df_11.columns = map(str.lower, df_11.columns)
    df_15 = ret_metabase(40154)
    df_15.columns = map(str.lower, df_15.columns)
    df_16 = ret_metabase(40541)
    df_16.columns = map(str.lower, df_16.columns)
    df_17 = ret_metabase(35135)
    df_17.columns = map(str.lower, df_17.columns)

    # Create a list of the dataframes
    dataframes = [df_2, df_4, df_5, df_7, df_11, df_15, df_16, df_17]

    # Concatenate the dataframes along the rows (union all)
    df_unfiltered = pd.concat(dataframes, ignore_index=True)
    # Excluding blacklisted retailers
    df_raw = df_unfiltered[df_unfiltered['main_system_id'] != 214101]
    df_raw['main_system_id'] = df_raw['main_system_id'].str.replace(',', '')
    df_raw['main_system_id'] = df_raw['main_system_id'].astype('Int64', errors='ignore')
    previous_calls = ret_metabase(35299)
    previous_calls['main_system_id'] = previous_calls['main_system_id'].str.replace(',', '')
    previous_calls['main_system_id'] = previous_calls['main_system_id'].astype('Int64', errors='ignore')

    df = df_raw.merge(previous_calls, on='main_system_id', how='left')
    exclude_same_assigns = previous_calls[['main_system_id']].astype('Int64')
    task_based = pd.DataFrame(df_raw.loc[~df_raw['main_system_id'].isin(exclude_same_assigns['main_system_id'].astype(int).values)])
    old_assgns = pd.DataFrame(df_raw.loc[df_raw['main_system_id'].isin(exclude_same_assigns['main_system_id'].astype(int).values)])
    old_assgns['main_system_id'] = old_assgns['main_system_id'].astype('Int64', errors='ignore')
    df_2 = old_assgns.merge(previous_calls, on='main_system_id', how='left')
    final_old_assign = df_2[[
        'main_system_id',
        'retailer_mobile_number',
        'retailer_name',
        'description',
        'balance',
        'offer',
        'reward',
        'agent_assigned',
        'priority'
        ]]

    priority_1 = pd.DataFrame(task_based[task_based["priority"]==1].reset_index())
    main_data_1 = assign_data(priority_1, task_based_list, 2000)
    priority_2 = pd.DataFrame(task_based[task_based["priority"]==2].reset_index())
    main_data_2 = assign_data(priority_2, task_based_list, 2000)
    priority_3 = pd.DataFrame(task_based[task_based["priority"]==3].reset_index())
    main_data_3 = assign_data(priority_3, task_based_list, 2000)
    priority_4 = pd.DataFrame(task_based[task_based["priority"]==4].reset_index())
    main_data_4 = assign_data(priority_4, task_based_list, 2000)
    priority_5 = pd.DataFrame(task_based[task_based["priority"]==5].reset_index())
    main_data_5 = assign_data(priority_5, task_based_list, 2000)
    priority_6 = pd.DataFrame(task_based[task_based["priority"]==6].reset_index())
    main_data_6 = assign_data(priority_6, task_based_list, 2000)
    priority_7 = pd.DataFrame(task_based[task_based["priority"]==7].reset_index())
    main_data_7 = assign_data(priority_7, task_based_list, 2000)
    priority_8 = pd.DataFrame(task_based[task_based["priority"]==8].reset_index())
    main_data_8 = assign_data(priority_8, task_based_list, 2000)
    priority_9 = pd.DataFrame(task_based[task_based["priority"]==9].reset_index())
    main_data_9 = assign_data(priority_9, task_based_list, 2000)
    priority_10 = pd.DataFrame(task_based[task_based["priority"]==10].reset_index())
    main_data_10 = assign_data(priority_10, task_based_list, 2000)
    priority_11 = pd.DataFrame(task_based[task_based["priority"]==11].reset_index())
    main_data_11 = assign_data(priority_11, task_based_list, 2000)
    priority_12 = pd.DataFrame(task_based[task_based["priority"]==12].reset_index())
    main_data_12 = assign_data(priority_12, task_based_list, 2000)
    priority_13 = pd.DataFrame(task_based[task_based["priority"]==13].reset_index())
    main_data_13 = assign_data(priority_13, task_based_list, 2000)
    priority_14 = pd.DataFrame(task_based[task_based["priority"]==14].reset_index())
    main_data_14 = assign_data(priority_14, task_based_list, 2000)
    priority_15 = pd.DataFrame(task_based[task_based["priority"]==15].reset_index())
    main_data_15 = assign_data(priority_15, task_based_list, 2000)

    # concatenating priorities
    main_list = [
        final_old_assign,
        main_data_1,
        main_data_2,
        main_data_3,
        main_data_4,
        main_data_5,
        main_data_6,
        main_data_7,
        main_data_8,
        main_data_9,
        main_data_10,
        main_data_11,
        main_data_12,
        main_data_13,
        main_data_14,
        main_data_15
    ]

    main_data_total = pd.concat(main_list, axis=0, ignore_index=True)
    main_data = main_data_total[["main_system_id", "retailer_mobile_number","retailer_name","description","reward","balance","offer","agent_assigned"]]
    main_data = main_data.drop_duplicates(subset=['main_system_id'])

    # removing previously assigned
    scope = [
        "https://spreadsheets.google.com/feeds",
        'https://www.googleapis.com/auth/spreadsheets',
        "https://www.googleapis.com/auth/drive.file",
        "https://www.googleapis.com/auth/drive"
    ]

    creds = ServiceAccountCredentials.from_json_keyfile_name(os.environ["GOOGLE_APPLICATION_CREDENTIALS_SHEETS"], scope)
    client = gspread.authorize(creds)

    sheet = client.open('[HOURLY] TASK-BASED Data')
    sheet_instance = sheet.worksheet('Data')
    assignments = sheet_instance.get('G5:G')
    assignments_df = pd.DataFrame.from_dict(assignments)

    df = main_data.copy()

    if hour == 18:
        main_data_to_assign = remove_assign(assignments_df,df,70)
        filtered_df = main_data_to_assign[main_data_to_assign['agent_assigned'].isin(task_based_list)]
    else:
        main_data_to_assign = remove_assign(assignments_df,df,50)
        filtered_df = main_data_to_assign[main_data_to_assign['agent_assigned'].isin(task_based_list)]

    # importing data in agents' sheet
    final_data_to_assign = filtered_df.copy()
    final_data_to_assign = final_data_to_assign.drop_duplicates(subset='main_system_id', keep="first")

    final_data_to_assign['added_at'] = dt.datetime.now()  + timedelta(hours=3)
    if final_data_to_assign.columns[0] == 'index':
        final_data_to_assign = final_data_to_assign.drop(labels='index', axis=1)
    pass

    final_data_to_assign = final_data_to_assign.astype(str)
    google_sh = client.open('[HOURLY] TASK-BASED Data')
    sheet = google_sh.worksheet('raw_data')
    # sheet.clear()
    sheet.append_rows([final_data_to_assign.columns.values.tolist()] + final_data_to_assign.values.tolist(), value_input_option="USER_ENTERED")

    # Writing historical data on the dwh
    # get secrets:
    host = os.environ["DWH_WRITER_HOST_NEW"]
    database = os.environ["DWH_WRITER_NAME_NEW"]
    user = os.environ["DWH_WRITER_USER_NAME_NEW"]
    password = os.environ["DWH_WRITER_PASSWORD_NEW"]

    conn = psycopg2.connect(host=host, database=database, user=user, password=password)
    print("Successfully connected to DB")

    df = final_data_to_assign[['main_system_id', 'description','offer', 'reward','agent_assigned']]
    df['dispatched_at'] = dt.datetime.now() - timedelta(hours=3)

    # db connection:
    engine = sqlalchemy.create_engine(f"postgresql+psycopg2://{user}:{password}@{host}/{database}")
    print(bool(engine))

    df['main_system_id'] = df['main_system_id'].str.replace(',', '')
    df['agent_assigned'] = df['agent_assigned'].str.replace(',', '')
    df['reward'] = df['reward'].str.replace(',', '')

    df.main_system_id = pd.to_numeric(df.main_system_id, errors='coerce').astype('Int64')
    df['reward'] = pd.to_numeric(df['reward'], errors='coerce').astype(float)
    df['agent_assigned'] = df['agent_assigned'].astype('Int64', errors='ignore')

    # update the table on db:
    with engine.connect() as conn:
        print("start")
        df.to_sql(name='task_based_am_projects', schema='fintech', con=engine, if_exists='append', chunksize=1000, method='multi', index=False)
        print("end")
        conn.close()

        # close the connection:
        conn.close()

else:
    print("Hour is not equal to 9")

In [None]:
assignments_df.shape[0]