In [1]:
from common_functions import google_sheets, get_secret, ret_metabase
from datetime import datetime, timedelta
import os
import sqlalchemy
import requests
import psycopg2
import json
from pathlib import Path
import pandas as pd
import numpy as np
import pytz
import logging
logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger(__name__)


  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
def initialize_env():
    """
    Initialize environment variables and credentials for Snowflake, Slack, Metabase, and Google Sheets.
    Reads secrets and sets them as environment variables for use in other functions.
    """
    snowflake_sg_secret = json.loads(get_secret("Snowflake-sagemaker"))
    slack_secret = json.loads(get_secret("prod/slack/reports"))
    fintech_service_account = json.loads(get_secret("prod/fintechServiceEmail/credentials"))
    dwh_writer_secret = json.loads(get_secret("prod/db/datawarehouse/sagemaker"))

    os.environ["SNOWFLAKE_USERNAME"] = snowflake_sg_secret["username"]
    os.environ["SNOWFLAKE_PASSWORD"] = snowflake_sg_secret["password"]
    os.environ["SNOWFLAKE_ACCOUNT"] = snowflake_sg_secret["account"]
    os.environ["SNOWFLAKE_DATABASE"] = snowflake_sg_secret["database"]

    os.environ["SLACK_TOKEN"] = slack_secret["token"]

    os.environ["FINTECH_EMONEY_EMAIL"] = fintech_service_account["email_name"]
    os.environ["FINTECH_EMONEY_PASSWORD"] = fintech_service_account["email_password"]

    metabase_secret = json.loads(get_secret("prod/metabase/maxab_config"))
    os.environ["EGYPT_METABASE_USERNAME"] = metabase_secret["metabase_user"]
    os.environ["EGYPT_METABASE_PASSWORD"] = metabase_secret["metabase_password"]

    os.environ["DWH_WRITER_HOST_NEW"] = dwh_writer_secret["host"]
    os.environ["DWH_WRITER_NAME_NEW"] = dwh_writer_secret["dbname"]
    os.environ["DWH_WRITER_USER_NAME_NEW"] = dwh_writer_secret["username"]
    os.environ["DWH_WRITER_PASSWORD_NEW"] = dwh_writer_secret["password"] 

    json_path_sheets = str(Path.home()) + "/service_account_key_sheets.json"
    sheets_key = get_secret("prod/maxab-sheets")
    f = open(json_path_sheets, "w")
    f.write(sheets_key)
    f.close()
    os.environ["GOOGLE_APPLICATION_CREDENTIALS_SHEETS"] = json_path_sheets

In [3]:
def get_available_agents(attendance_df, current_hour):
    """
    Get a list of available task-based agents for the current hour based on attendance DataFrame.
    Args:
        attendance_df (pd.DataFrame): DataFrame with agent attendance info
        current_hour (int): Current hour (24-hour format)
    Returns:
        list: List of available agent IDs
    """
    attendance_copy = attendance_df.copy()
    
    attendance_copy['start_time'] = attendance_copy['start_time'].astype(int)
    attendance_copy['end_time'] = attendance_copy['end_time'].astype(int)
    
    attendance_copy['assignment_start_time'] = attendance_copy['start_time'] - 1
    attendance_copy['assignment_end_time'] = attendance_copy['end_time'] - 1
    
    attendance_copy['assign_data'] = np.where(
        (current_hour >= attendance_copy['assignment_start_time']) & 
        (current_hour <= attendance_copy['assignment_end_time']),
        'yes', 'no')
    
    task_based_agents = attendance_copy.loc[
        (attendance_copy['project'] == 'task_based') & 
        (attendance_copy['assign_data'] == 'yes')]
    
    task_based_list = task_based_agents['agent_id'].values.tolist()
    print(f"Number of available agents: {len(task_based_list)}")
    return task_based_list


In [4]:
def assign_data_equal_projects(df, list):
    """
    Distribute rows of a DataFrame equally among a list of agents, grouped by project name.
    Args:
        df (pd.DataFrame): DataFrame with 'project_name'
        list (list): List of agent IDs or names
    Returns:
        pd.DataFrame: DataFrame with 'agent_assigned' column
    """
    df = df.sample(frac=1)  # Shuffle the data
    project_types = df['project_name'].unique()
    
    assigned_data = pd.DataFrame()
    
    for project in project_types:
        project_df = df[df['project_name'] == project]
        project_df = project_df.reset_index(drop=True)
        rows_per_agent = len(project_df) // len(list)
        remainder = len(project_df) % len(list)
        
        # Distribute rows equally
        for i, agent in enumerate(list):
            start_idx = i * rows_per_agent
            end_idx = start_idx + rows_per_agent
            agent_data = project_df.iloc[start_idx:end_idx].copy()
            agent_data['agent_assigned'] = agent
            
            # Handle remainder
            if i < remainder:
                extra_row = project_df.iloc[end_idx:end_idx+1].copy()
                extra_row['agent_assigned'] = agent
                agent_data = pd.concat([agent_data, extra_row])
            
            assigned_data = pd.concat([assigned_data, agent_data])
    
    assigned_data = assigned_data.reset_index(drop=True)
    print("Assignment by project complete.")
    return assigned_data

In [16]:
    morocco_tz = pytz.timezone('Africa/Casablanca')
    now = datetime.now(morocco_tz)
    current_time = now.time()
    current_day = now.strftime('%A')
    print(current_day)
    hour = int(str(now.time())[0:2])
    hour

Monday


9

In [154]:
dataframes = ret_metabase("Kenya",15328)
dataframes

/home/ec2-user/service_account_key.json


Unnamed: 0,RETAILER_ID,POLYGON_NAME_EN,MOBILE,NAME,CITY,LAST_ACTIVATION_DATE,LAST_ACTIVATION_DAY,PREV_ACTIVATION,PREV_ACT_DAY
0,94278,Iten_3,728107696,Prisca Koech,Eldoret,2025-07-01,2025-07-01,2025-06-01,2025-06-02
1,22608,Jamhuri/Ayani/Karanja_2,710759483,shop Masai logwana,Nairobi,2025-07-01,2025-07-03,2025-06-01,2025-06-06
2,106789,Kiganjo_1,711142128,Benson,Nairobi,2025-07-01,2025-07-01,2025-06-01,2025-06-26
3,50976,Kamulu/Joska_4,724407810,hanna,Nairobi,2025-07-01,2025-07-10,2025-06-01,2025-06-04
4,31616,Rimpa/Kiserian_1,722568784,Abdi Galgalo,Nairobi,2025-07-01,2025-07-11,2025-06-01,2025-06-06
...,...,...,...,...,...,...,...,...,...
11942,178919,Kiriri/Kahawa wendani_3,743193353,Restoration shop,Nairobi,2025-07-01,2025-07-01,2025-06-01,2025-06-11
11943,217271,Bul/Kerarapon/Karen_4,748361266,Reuben Gichana,Nairobi,2025-07-01,2025-07-01,2025-06-01,2025-06-17
11944,193038,Lindi/Laini Saba/Uperhill_2,791526454,CATCHERS,Nairobi,2025-07-01,2025-07-06,2025-06-01,2025-06-01
11945,134384,Bukura_1,725792435,joe,Kisumu,2025-07-01,2025-07-02,2025-06-01,2025-06-02


In [5]:
now = datetime.now() + timedelta(hours=3)
hour = int(str(now.time())[0:2])



In [3]:
Data = google_sheets("POS Campaign | July & August 2025", "Main", "Get")
# Convert Date column
# Data["Date"] = pd.to_datetime(Data["Date"], format="%m/%d/%Y")
# # Get today's date (normalized to midnight)
# today = pd.Timestamp.today().normalize()
# # Filter for today's rows
# Data_filtered = Data[Data["Date"] == today]

/home/ec2-user/service_account_key.json


In [8]:
sheet_Data = Data.copy()
sheet_Data.rename(columns={'mobile': 'main_system_id','name': 'retailer_name'},inplace=True)

sheet_Data["main_system_id"] = sheet_Data["main_system_id"].astype('Int64').astype(str)
sheet_Data["description"] = (
    "POS Acquisition Marketing - location: "
    + sheet_Data["location"].astype(str)
    + " - campaign:"
    + sheet_Data["campaign"].astype(str)
)


sheet_Data.drop(columns=["location", "campaign", "Date"], inplace=True)
sheet_Data['Added_at'] = now
sheet_Data['project_name'] = "POS Leads"

In [105]:
# Convert Date column
Data["Date"] = pd.to_datetime(Data["Date"], format="%m/%d/%Y")
# Get today's date (normalized to midnight)
today = pd.Timestamp.today().normalize()
# Filter for today's rows
filtered = Data[Data["Date"] == today]

In [9]:
name_list = [
 'Somaya Alrab',
 'Esraa Mohamed',
 'Marina Riyad',
 'Raneen Ali',
 'Abdelrahman Merghany',
 'Israa Abdelhamid',
 'Nada Saber',
 'Ebthal Saber']

In [10]:
Pos_leads = assign_data_equal_projects(sheet_Data , name_list)

Assignment by project complete.


In [11]:
agent_group = google_sheets("Agents - Retailers", "Agent-to-group", "get")
col_list = agent_group.columns.tolist()

/home/ec2-user/service_account_key.json


In [12]:
google_sheets("Agents - Retailers", "Task_based", "append", df=Pos_leads)

/home/ec2-user/service_account_key.json


'Data is appended to the sheet successfully'

In [14]:
for i in col_list:
    if i == "Benchmark":
        continue 
    filtered_df = Pos_leads[Pos_leads["agent_assigned"] == i]
    filtered_df.drop(columns=["agent_assigned"], inplace=True)
    print(f"Assigned to {i},{len(filtered_df)} Task-based Tasks")
    google_sheets(i, "Task_based", "append", df=filtered_df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Somaya Alrab,51 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Esraa Mohamed,51 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Marina Riyad,51 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Raneen Ali,51 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Abdelrahman Merghany,51 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Israa Abdelhamid,51 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Nada Saber,50 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Ebthal Saber,50 Task-based Tasks
/home/ec2-user/service_account_key.json


In [6]:
attendance = ret_metabase("EGYPT", 13502)

/home/ec2-user/service_account_key.json


In [46]:
Data = google_sheets("POS Campaign | July & August 2025", "Main", "Get")

task_based_list = get_available_agents(attendance, hour)
# Convert Date column
Data["Date"] = pd.to_datetime(Data["Date"], format="%m/%d/%Y")
# Get today's date (normalized to midnight)
today = pd.Timestamp.today().normalize()
# Filter for today's rows
Data_filtered = Data[Data["Date"] == today]

sheet_Data = Data_filtered.copy()
sheet_Data.rename(columns={'mobile': 'main_system_id','name': 'retailer_name'},inplace=True)

sheet_Data["main_system_id"] = sheet_Data["main_system_id"].astype('Int64').astype(str)

sheet_Data["description"] = (
    "POS Acquisition Marketing - location: "
    + sheet_Data["location"].astype(str)
    + " - campaign:"
    + sheet_Data["campaign"].astype(str)
)

sheet_Data.drop(columns=["location", "campaign", "Date"], inplace=True)
sheet_Data['Added_at'] = now
sheet_Data['project_name'] = "POS Leads"
name_list = ['Somaya Alrab','Esraa Mohamed','Marina Riyad','Raneen Ali','Abdelrahman Merghany','Israa Abdelhamid','Nada Saber','Ebthal Saber']
Pos_leads = assign_data_equal_projects(sheet_Data , name_list)
data_for_sql = assign_data_equal_projects(sheet_Data, task_based_list)
data_for_sql.rename(columns={'main_system_id': 'offer'},inplace=True)
data_for_sql['main_system_id'] = 11111
data_for_sql.drop(columns=["description"], inplace=True)
data_for_sql["description"] = "POS Acquisition Marketing - campaign: " + Data["campaign"]
data_for_sql = data_for_sql[['main_system_id', 'description','offer','agent_assigned']]
data_for_sql['dispatched_at'] = now
data_for_sql['agent_assigned'] = data_for_sql['agent_assigned'].astype(int) 

/home/ec2-user/service_account_key.json
Number of available agents: 8
Assignment by project complete.
Assignment by project complete.


In [50]:
data_for_sql

Unnamed: 0,main_system_id,description,offer,agent_assigned,dispatched_at
0,11111,POS Acquisition Marketing - campaign: Delta,201278771787,2484,2025-07-16 13:37:21.712284
1,11111,POS Acquisition Marketing - campaign: Delta,201116101363,2484,2025-07-16 13:37:21.712284
2,11111,POS Acquisition Marketing - campaign: Delta,201227725768,2484,2025-07-16 13:37:21.712284
3,11111,POS Acquisition Marketing - campaign: Delta,201224045696,2484,2025-07-16 13:37:21.712284
4,11111,POS Acquisition Marketing - campaign: Delta,201015495276,2484,2025-07-16 13:37:21.712284
...,...,...,...,...,...
401,11111,POS Acquisition Marketing - campaign: Delta,201011302096,5565,2025-07-16 13:37:21.712284
402,11111,POS Acquisition Marketing - campaign: Delta,201062284116,5565,2025-07-16 13:37:21.712284
403,11111,POS Acquisition Marketing - campaign: Delta,201100131329,5565,2025-07-16 13:37:21.712284
404,11111,POS Acquisition Marketing - campaign: Delta,201118588132,5565,2025-07-16 13:37:21.712284


In [7]:
def initialize_env():
    """
    Initialize environment variables and credentials for Snowflake, Slack, Metabase, and Google Sheets.
    Reads secrets and sets them as environment variables for use in other functions.
    """
    snowflake_sg_secret = json.loads(get_secret("Snowflake-sagemaker"))
    slack_secret = json.loads(get_secret("prod/slack/reports"))
    fintech_service_account = json.loads(get_secret("prod/fintechServiceEmail/credentials"))
    dwh_writer_secret = json.loads(get_secret("prod/db/datawarehouse/sagemaker"))

    os.environ["SNOWFLAKE_USERNAME"] = snowflake_sg_secret["username"]
    os.environ["SNOWFLAKE_PASSWORD"] = snowflake_sg_secret["password"]
    os.environ["SNOWFLAKE_ACCOUNT"] = snowflake_sg_secret["account"]
    os.environ["SNOWFLAKE_DATABASE"] = snowflake_sg_secret["database"]

    os.environ["SLACK_TOKEN"] = slack_secret["token"]

    os.environ["FINTECH_EMONEY_EMAIL"] = fintech_service_account["email_name"]
    os.environ["FINTECH_EMONEY_PASSWORD"] = fintech_service_account["email_password"]

    metabase_secret = json.loads(get_secret("prod/metabase/maxab_config"))
    os.environ["EGYPT_METABASE_USERNAME"] = metabase_secret["metabase_user"]
    os.environ["EGYPT_METABASE_PASSWORD"] = metabase_secret["metabase_password"]

    os.environ["DWH_WRITER_HOST_NEW"] = dwh_writer_secret["host"]
    os.environ["DWH_WRITER_NAME_NEW"] = dwh_writer_secret["dbname"]
    os.environ["DWH_WRITER_USER_NAME_NEW"] = dwh_writer_secret["username"]
    os.environ["DWH_WRITER_PASSWORD_NEW"] = dwh_writer_secret["password"] 

    json_path_sheets = str(Path.home()) + "/service_account_key_sheets.json"
    sheets_key = get_secret("prod/maxab-sheets")
    f = open(json_path_sheets, "w")
    f.write(sheets_key)
    f.close()
    os.environ["GOOGLE_APPLICATION_CREDENTIALS_SHEETS"] = json_path_sheets

def send_text_slack(channel, text):
    import slack
    import os

    initialize_env()

    client = slack.WebClient(token=os.environ["SLACK_TOKEN"])
    try:
        client.chat_postMessage(
        channel=channel,
        text=text
      )
        print('Message Sent')
    except Exception as e:
        raise e

def task_fail_slack_alert(context):
    slack_msg = """
        :red_circle: Task Failed.
        *Task*: {task}  
        *Dag*: {dag} 
        *Execution Time*: {exec_date}  
        *Reason*: {exception}
    """.format(
        task=context.get('task_instance').task_id,
        dag=context.get('task_instance').dag_id,
        exec_date=context.get('execution_date'),
        exception=context.get('exception')
    )

    send_text_slack(channel='account_mgmt_alerts', text=slack_msg)



def clean_column_id(df, column_name):
    """
    Clean a DataFrame column by removing commas and converting to integer type if possible.
    Args:
        df (pd.DataFrame): DataFrame to clean
        column_name (str): Name of the column to clean
    Returns:
        pd.DataFrame: DataFrame with cleaned column
    """
    # Ensure the column is treated as a string
    df[column_name] = df[column_name].astype(str)
    
    # Replace commas in the string
    df[column_name] = df[column_name].str.replace(',', '')
    
    # Convert back to an integer, if appropriate
    df[column_name] = df[column_name].astype('Int64', errors='ignore')
    
    return df

# ----------------------------------------
# Mapping distribution (Segment-based)
# ----------------------------------------
def assign_data_by_mapping(df, mapping_df):
    """
    Assign agents to retailers based on a mapping DataFrame.
    Args:
        df (pd.DataFrame): DataFrame with 'main_system_id'
        mapping_df (pd.DataFrame): DataFrame with 'MAIN_SYSTEM_ID' and 'AGENT_ID'
    Returns:
        pd.DataFrame: DataFrame with 'agent_assigned' column
    """
    # store retail-agent mapping in a dictionary 
    mapping_dict = dict(zip(mapping_df['MAIN_SYSTEM_ID'], mapping_df['AGENT_ID']))
        
    assigned_agents = []

    for retailer_id in df['main_system_id']:
        if retailer_id in mapping_dict:
            assigned_agents.append(mapping_dict[retailer_id])
        else:
            assigned_agents.append(None)
   
    df['agent_assigned'] = assigned_agents
    df = df.reset_index(drop=True)

    return df

def assign_data_equal_projects(df, list):
    """
    Distribute rows of a DataFrame equally among a list of agents, grouped by project name.
    Args:
        df (pd.DataFrame): DataFrame with 'project_name'
        list (list): List of agent IDs or names
    Returns:
        pd.DataFrame: DataFrame with 'agent_assigned' column
    """
    df = df.sample(frac=1)  # Shuffle the data
    project_types = df['project_name'].unique()
    
    assigned_data = pd.DataFrame()
    
    for project in project_types:
        project_df = df[df['project_name'] == project]
        project_df = project_df.reset_index(drop=True)
        rows_per_agent = len(project_df) // len(list)
        remainder = len(project_df) % len(list)
        
        # Distribute rows equally
        for i, agent in enumerate(list):
            start_idx = i * rows_per_agent
            end_idx = start_idx + rows_per_agent
            agent_data = project_df.iloc[start_idx:end_idx].copy()
            agent_data['agent_assigned'] = agent
            
            # Handle remainder
            if i < remainder:
                extra_row = project_df.iloc[end_idx:end_idx+1].copy()
                extra_row['agent_assigned'] = agent
                agent_data = pd.concat([agent_data, extra_row])
            
            assigned_data = pd.concat([assigned_data, agent_data])
    
    assigned_data = assigned_data.reset_index(drop=True)
    print("Assignment by project complete.")
    return assigned_data


def get_available_agents(attendance_df, current_hour):
    """
    Get a list of available task-based agents for the current hour based on attendance DataFrame.
    Args:
        attendance_df (pd.DataFrame): DataFrame with agent attendance info
        current_hour (int): Current hour (24-hour format)
    Returns:
        list: List of available agent IDs
    """
    attendance_copy = attendance_df.copy()
    
    attendance_copy['start_time'] = attendance_copy['start_time'].astype(int)
    attendance_copy['end_time'] = attendance_copy['end_time'].astype(int)
    
    attendance_copy['assignment_start_time'] = attendance_copy['start_time'] - 1
    attendance_copy['assignment_end_time'] = attendance_copy['end_time'] - 1
    
    attendance_copy['assign_data'] = np.where(
        (current_hour >= attendance_copy['assignment_start_time']) & 
        (current_hour <= attendance_copy['assignment_end_time']),
        'yes', 'no')
    
    task_based_agents = attendance_copy.loc[
        (attendance_copy['project'] == 'task_based') & 
        (attendance_copy['assign_data'] == 'yes')]
    
    task_based_list = task_based_agents['agent_id'].values.tolist()
    print(f"Number of available agents: {len(task_based_list)}")
    return task_based_list


def assign_offers(query_1, query_2, query_3=None):
    """
    Assign offers from query_2 and query_3 to query_1 based on specific rules.
    If a MAIN_SYSTEM_ID appears in both query_2 and query_3, assign one to OFFER_1 and one to OFFER_2.
    
    Args:
        query_1 (pd.DataFrame): Main dataframe containing MAIN_SYSTEM_ID
        query_2 (pd.DataFrame): Dataframe containing OFFER to be mapped as OFFER_1
        query_3 (pd.DataFrame): Dataframe containing OFFER to be mapped as OFFER_2 or OFFER_1
        
    Returns:
        pd.DataFrame: Updated query_1 with assigned offers
    """
    # Create a copy to avoid modifying the original
    result = query_1.copy()
    
    # Ensure required columns exist in input dataframes
    if 'MAIN_SYSTEM_ID' not in result.columns:
        raise ValueError("query_1 must contain 'MAIN_SYSTEM_ID' column")
    if 'MAIN_SYSTEM_ID' not in query_2.columns or 'OFFER' not in query_2.columns:
        raise ValueError("query_2 must contain 'MAIN_SYSTEM_ID' and 'OFFER' columns")

    
    # Convert MAIN_SYSTEM_ID to string type in all dataframes
    result['MAIN_SYSTEM_ID'] = result['MAIN_SYSTEM_ID'].astype(str)
    query_2['MAIN_SYSTEM_ID'] = query_2['MAIN_SYSTEM_ID'].astype(str)

    
    # Step 1: Map query_2 offers to OFFER_1
    # Create a new DataFrame with just the columns we need
    offer1_df = pd.DataFrame({
        'MAIN_SYSTEM_ID': query_2['MAIN_SYSTEM_ID'],
        'OFFER_1_new': query_2['OFFER']  # Use a different name to avoid conflicts
    })
    
    # Merge with result
    result = pd.merge(result, offer1_df, on='MAIN_SYSTEM_ID', how='left')
    print("\nAfter first merge, columns:", result.columns.tolist())

    
    if query_3 is not None:
        if 'MAIN_SYSTEM_ID' not in query_3.columns or 'OFFER' not in query_3.columns:
            raise ValueError("query_3 must contain 'MAIN_SYSTEM_ID' and 'OFFER' columns")
        query_3['MAIN_SYSTEM_ID'] = query_3['MAIN_SYSTEM_ID'].astype(str)
        offer_temp_df = pd.DataFrame({
            'MAIN_SYSTEM_ID': query_3['MAIN_SYSTEM_ID'],
            'OFFER_temp': query_3['OFFER']
        })
        result = pd.merge(result, offer_temp_df, on='MAIN_SYSTEM_ID', how='left')
        print("After second merge, columns:", result.columns.tolist())

        has_offer1 = result['OFFER_1'].notna() if 'OFFER_1' in result.columns else pd.Series([False]*len(result))
        has_offer1_new = result['OFFER_1_new'].notna()
        has_temp = result['OFFER_temp'].notna()

        both_offers = has_offer1_new & has_temp
        result.loc[both_offers, 'OFFER_1'] = result.loc[both_offers, 'OFFER_1_new']
        result.loc[both_offers, 'OFFER_2'] = result.loc[both_offers, 'OFFER_temp']

        result.loc[~has_offer1 & has_offer1_new & ~both_offers, 'OFFER_1'] = result.loc[~has_offer1 & has_offer1_new & ~both_offers, 'OFFER_1_new']
        result.loc[~has_offer1 & has_temp & ~both_offers, 'OFFER_1'] = result.loc[~has_offer1 & has_temp & ~both_offers, 'OFFER_temp']
        result.loc[has_offer1 & ~has_offer1_new & has_temp, 'OFFER_2'] = result.loc[has_offer1 & ~has_offer1_new & has_temp, 'OFFER_temp']

        result.drop(columns=['OFFER_1_new', 'OFFER_temp'], inplace=True)
    else:
        # Only assign OFFER_1 from query_2
        result['OFFER_1'] = result['OFFER_1_new']
        if 'OFFER_1_new' in result.columns:
            result.drop(columns=['OFFER_1_new'], inplace=True)
    print("Final columns:", result.columns.tolist())
    return result

if __name__ == "__main__":
    # Example usage
    # query_1 = pd.DataFrame({'MAIN_SYSTEM_ID': [1, 2, 3]})
    # query_2 = pd.DataFrame({'MAIN_SYSTEM_ID': [1, 2], 'OFFER': ['A', 'B']})
    # query_3 = pd.DataFrame({'MAIN_SYSTEM_ID': [2, 3], 'OFFER': ['C', 'D']})
    # result = assign_offers(query_1, query_2, query_3)
    pass


def assign_agents(new_data):
    """
    Assign available agents to new data rows in a round-robin fashion.
    Args:
        new_data (pd.DataFrame): DataFrame of new leads or retailers
    Returns:
        np.ndarray: Array of assigned agent names
    """
    Agents = google_sheets('Marketing test', 'Sheet2', 'get')
    av_agents = Agents[Agents['Available'] == 'yes'].copy()
    av_agents = av_agents[["Agent_assigned"]]
    agent_names = av_agents['Agent_assigned'].values
    num_retailers = len(new_data)
    repeated_agents = np.tile(agent_names, int(np.ceil(num_retailers / len(agent_names))))[:num_retailers]
    np.random.shuffle(repeated_agents)
    return repeated_agents

def Add_marketing_leads():
    """
    Process and add new marketing leads to the Google Sheet, assigning agents and cleaning data.
    """
    pos_sheet = google_sheets('POS Leads', 'Master data', 'get')
    pos_sheet_2 = google_sheets('POS Leads', 'Insurance data', 'get')


    pos_sheet= pos_sheet[~pos_sheet['Mobile Number'].duplicated(keep=False)]

    filtered_sheet = pos_sheet[
        (~pos_sheet['Comment2'].str.lower().eq('done')) &
        (~pos_sheet['Comment'].str.contains('done', case=False, na=False)) &
        ((pos_sheet['المحافظة'].isin(['القاهره', 'الجيزة', 'giza'])) | (pos_sheet['المحافظة'].isna()))
    ]


    filtered_sheet = filtered_sheet.drop(['Call Status', 'Comment2', 'market type', 'Unnamed: 11', 'Unnamed: 12'], axis=1)

    numbers_called = pos_sheet_2["Mobile Number"].astype(str).str.extract(r'(\d+)')[0].apply(lambda x: x.zfill(11))

    new_data = filtered_sheet[~filtered_sheet['Mobile Number'].isin(numbers_called)]
    new_data[['Description', 'Project Name']] = ['Insurance POS العميل طلب ماكينه تامين', 'Insurance POS']

    new_data['Mobile Number'] = new_data['Mobile Number'].astype(str).apply(lambda x: '0' + x if not x.startswith('0') else x)

    new_data[['Comment', 'Name']] = ''
    agents = assign_agents(new_data)
    new_data['Name'] = agents
    # google_sheets('POS Leads', 'Insurance data', 'append', df = new_data)

    print(f"added {len(new_data)} new rows to the 'Insurance data' Sheet")
    
    import re

    # Remove any non-digit characters
    new_data['Mobile Number'] = new_data['Mobile Number'].astype(str).apply(lambda x: re.sub(r'\D', '', x))
    
    new_data_for_main = new_data.rename(columns={
    'Name': 'agent_assigned',
    'full_name': 'retailer_name',
    'Project Name': 'project_name', 
    'Description' : 'description'
    })[["agent_assigned", "retailer_name", "Mobile Number", "project_name", 'description']]
    
    google_sheets("Agents - Retailers", "Task_based", "append", df=new_data_for_main)
    
    
def task_based():
    """
    Main function to process and assign task-based projects to agents, update Google Sheets, and return main data.
    Returns:
        pd.DataFrame: DataFrame of assigned tasks
    """
    now = datetime.now() + timedelta(hours=3)
    hour = int(str(now.time())[0:2])
    
    
    attendance = ret_metabase("EGYPT", 13502)

    task_based_list = get_available_agents(attendance, hour)
    
    query_ids = google_sheets('Agents - Retailers', 'Query ID', 'get')
    blacklisted_retailers = query_ids['Blacklisted_retailers'].dropna().astype(int).tolist()
    data = fetch_and_process_queries(query_ids, blacklisted_retailers)
    main_data = assign_data_equal_projects(data , task_based_list)

    data_to_sql = main_data.copy()
    
    agents = google_sheets('Agents - Retailers', 'Data', 'get')

    # Merge on agent ID
    main_data = main_data.merge(agents, left_on='agent_assigned', right_on='Agent_id', how='left')

    # Replace the agent_assigned column with the Agent name
    main_data['agent_assigned'] = main_data['Agent']
    
    
    # Optionally drop the now-redundant columns
    main_data.drop(columns=['Agent', 'Agent_id'], inplace=True)
    
    main_data.drop(columns=["retailer_mobile_number"], inplace=True, errors='ignore')
    
    main_data['Added_at'] = now
    main_data = main_data[['agent_assigned', 'main_system_id', 'retailer_name', 'project_name', 'description', 'Added_at']]
    print(f" data before filtering {len(main_data)}")
    
    data_dispatched = google_sheets("Agents - Retailers", "Task_based", "get")
    already_assigned_retailers = set(data_dispatched['main_system_id'].astype(int).unique())
    main_data = main_data[~main_data['main_system_id'].isin(already_assigned_retailers)]
    print(f" data after filtering {len(main_data)}")
    main_data = main_data.groupby('agent_assigned').head(20)

    # google_sheets("Agents - Retailers", "Task_based", "append", df=main_data)
    
    print("tasks added to Task_based sheet")
    return main_data, data_to_sql
    
    
def fetch_and_process_queries(query_ids, blacklisted_retailers):
    """
    Fetch and process data from queries, removing blacklisted retailers and updating Google Sheets.
    Args:
        query_ids (pd.DataFrame): DataFrame with query IDs
        blacklisted_retailers (list): List of blacklisted retailer IDs
    Returns:
        pd.DataFrame: Cleaned DataFrame of query results
    """
    queries = query_ids['Task_based'].dropna().astype(int).tolist()
    print(f"Fetching data from {len(queries)} queries...")
    
    # Process queries
    dataframes = [ret_metabase("EGYPT", query) for query in queries]
    # print(dataframes)
    empty_queries = []
    for i, df in enumerate(dataframes):
        if df.empty:
            empty_queries.append(queries[i])
        else:
            print(f"Query {queries[i]} returned {len(df)} records")
        df.columns = map(str.lower, df.columns)
    if empty_queries:
        print(f"WARNING: Queries {empty_queries} returned empty dataframe!")
    
    # ----------------------------------------
    # write in google sheet available data
    # ----------------------------------------
    # check for empty queries and get project names
    for idx, row in query_ids.iterrows():
        query_id = row['Task_based']
        if pd.notna(query_id):
            df = ret_metabase("EGYPT", int(query_id))
            query_ids.at[idx, 'Available_data'] = 'Empty' if df.empty else str(len(df))
            # Add project name if available
            if not df.empty and 'PROJECT_NAME' in df.columns:
                query_ids.at[idx, 'Project_Name'] = df['PROJECT_NAME'].iloc[0]

    # Overwrite the sheet with updated full data
    google_sheets('Agents - Retailers', 'Query ID', 'overwrite', df=query_ids)
    
    # Combine and clean dataframes
    df_unfiltered = pd.concat(dataframes, ignore_index=True)
    print(f"Total tasks available: {df_unfiltered.shape[0]}")
    
    # Remove blacklisted retailers
    df_raw = df_unfiltered[~df_unfiltered['main_system_id'].isin(blacklisted_retailers)]
    df_raw = clean_column_id(df_raw, 'main_system_id')
    print(f"Removed {len(df_unfiltered) - len(df_raw)} blacklisted retailers")
    
    return df_raw


def ranking_system(query):
    """
    Reorder a DataFrame of calls by recency and rank, prioritizing older calls and those with offers.
    Args:
        query (pd.DataFrame): DataFrame with call and offer info
    Returns:
        pd.DataFrame: Reordered DataFrame
    """
    # Convert the LAST_REACHABLE_CALL column to datetime
    query['LAST_REACHABLE_CALL'] = pd.to_datetime(query['LAST_REACHABLE_CALL'])

    # Calculate the cutoff date for recent calls
    now = datetime.now()
    five_days_ago = now - timedelta(days=5)

    # Separate into recent and older calls
    recent_calls = query[query['LAST_REACHABLE_CALL'] >= five_days_ago].copy()
    older_calls = query[(query['LAST_REACHABLE_CALL'] < five_days_ago) | (query["LAST_REACHABLE_CALL"].isnull())].copy()

    offer = older_calls[older_calls['OFFER_1'].notnull()].copy()
    no_offer = older_calls[older_calls['OFFER_1'].isnull()].copy()
    # Sort both by RANK
    offer.sort_values(by="RANK", ascending=True, inplace=True)
    no_offer.sort_values(by="RANK", ascending=True, inplace=True)
    recent_calls.sort_values(by="RANK", ascending=True, inplace=True)

    # Combine: older calls first, then recent ones
    reordered_df = pd.concat([offer, no_offer, recent_calls], ignore_index=True)


    
    return reordered_df
initialize_env()

In [9]:
print("Starting agent dispatching process...")
query_1 = ret_metabase("Egypt", 60949)
query_2 = ret_metabase("Egypt", 61071)
query_3 = ret_metabase("Egypt", 61188)
sheet_data = google_sheets("Agents - Retailers", "raw_data", "get")
agent_group = google_sheets("Agents - Retailers", "Agent-to-group", "get")
col_list = agent_group.columns.tolist()

Starting agent dispatching process...
/home/ec2-user/service_account_key.json
/home/ec2-user/service_account_key.json
/home/ec2-user/service_account_key.json
/home/ec2-user/service_account_key.json
/home/ec2-user/service_account_key.json


In [8]:
attendance = ret_metabase("EGYPT", 13502)

/home/ec2-user/service_account_key.json


In [26]:
Data = google_sheets("POS Campaign | July & August 2025", "Main", "Get")

task_based_list = get_available_agents(attendance, hour)
# Convert Date column
Data["Date"] = pd.to_datetime(Data["Date"], format="%m/%d/%Y")
# Get today's date (normalized to midnight)
today = pd.Timestamp.today().normalize()
# Filter for today's rows
Data_filtered = Data[Data["Date"] == today]

sheet_Data = Data_filtered.copy()
sheet_Data.rename(columns={'mobile': 'main_system_id','name': 'retailer_name'},inplace=True)

sheet_Data["main_system_id"] = sheet_Data["main_system_id"].astype('Int64').astype(str)
# Remove leading '2' if present
sheet_Data["main_system_id"] = sheet_Data["main_system_id"].str.replace(r"^2", "", regex=True)

# Drop duplicate main_system_id values (keeping first occurrence)
sheet_Data = sheet_Data.drop_duplicates(subset="main_system_id", keep="first")

sheet_Data["description"] = (
    "POS Acquisition Marketing - location: "
    + sheet_Data["location"].astype(str)
    + " - campaign:"
    + sheet_Data["campaign"].astype(str)
)

sheet_Data.drop(columns=["location", "campaign", "Date"], inplace=True)
sheet_Data['Added_at'] = now
sheet_Data['project_name'] = "POS Leads"
name_list = ['Somaya Alrab','Esraa Mohamed','Marina Riyad','Raneen Ali','Abdelrahman Merghany','Israa Abdelhamid','Nada Saber','Ebthal Saber']
Pos_leads = assign_data_equal_projects(sheet_Data , name_list)
data_for_sql = assign_data_equal_projects(sheet_Data, task_based_list)
data_for_sql.rename(columns={'main_system_id': 'offer'},inplace=True)
data_for_sql['main_system_id'] = 11111
data_for_sql.drop(columns=["description"], inplace=True)
data_for_sql["description"] = "POS Acquisition Marketing - campaign: " + Data["campaign"]
data_for_sql = data_for_sql[['main_system_id', 'description','offer','agent_assigned']]
data_for_sql['dispatched_at'] = now
data_for_sql['agent_assigned'] = data_for_sql['agent_assigned'].astype(int) 
google_sheets("Agents - Retailers", "Task_based", "append", df=Pos_leads)

for i in col_list:
    if i == "Benchmark":
        continue 
    filtered_df = Pos_leads[Pos_leads["agent_assigned"] == i]
    filtered_df.drop(columns=["agent_assigned"], inplace=True)
    print(f"Assigned to {i},{len(filtered_df)} Task-based Tasks")
    google_sheets(i, "Task_based", "append", df=filtered_df)

with engine.connect() as conn:
    data_for_sql.to_sql('task_based_am_projects', schema='fintech', con=engine, if_exists='append', chunksize=1000, method='multi', index=False)


/home/ec2-user/service_account_key.json
Number of available agents: 8
Assignment by project complete.
Assignment by project complete.
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Somaya Alrab,9 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Esraa Mohamed,9 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Marina Riyad,9 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Raneen Ali,8 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Abdelrahman Merghany,8 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Israa Abdelhamid,8 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Nada Saber,8 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Ebthal Saber,8 Task-based Tasks
/home/ec2-user/service_account_key.json


In [17]:
query_1 = ret_metabase("Egypt", 60949)

/home/ec2-user/service_account_key.json


In [24]:
unique_group_types = query_1['GROUP_TYPE'].unique()
print(unique_group_types)


['T5' 'C1' 'T1' 'T4' 'T3' 'T6' 'T2' 'T7']


In [21]:
    initialize_env()

    host = os.environ["DWH_WRITER_HOST_NEW"]
    database = os.environ["DWH_WRITER_NAME_NEW"]
    user = os.environ["DWH_WRITER_USER_NAME_NEW"]
    password = os.environ["DWH_WRITER_PASSWORD_NEW"]

    conn = psycopg2.connect(host=host, database=database, user=user, password=password)
    print("Successfully connected to DB")
    
    # db connection:
    engine = sqlalchemy.create_engine(f"postgresql+psycopg2://{user}:{password}@{host}/{database}")
    print(bool(engine))

Successfully connected to DB
True


In [22]:
with engine.connect() as conn:
    data_for_sql.to_sql('task_based_am_projects', schema='fintech', con=engine, if_exists='append', chunksize=1000, method='multi', index=False)


In [34]:
for i in col_list:
    if i == "Benchmark":
        continue 
    filtered_df = Pos_leads[Pos_leads["agent_assigned"] == i]
    filtered_df.drop(columns=["agent_assigned"], inplace=True)
    print(f"Assigned to {i},{len(filtered_df)} Task-based Tasks")
    google_sheets(i, "Task_based", "append", df=filtered_df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Somaya Alrab,18 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Esraa Mohamed,17 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Marina Riyad,17 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Raneen Ali,17 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Abdelrahman Merghany,16 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Israa Abdelhamid,17 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Nada Saber,17 Task-based Tasks
/home/ec2-user/service_account_key.json


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=["agent_assigned"], inplace=True)


Assigned to Ebthal Saber,17 Task-based Tasks
/home/ec2-user/service_account_key.json
