In [1]:
from common_functions import ret_metabase, google_sheets, dwh_query, upload_dataframe_to_pg
import pandas as pd
from datetime import datetime, timedelta
import datetime as dt
import time
import os
import boto3
import base64
from botocore.exceptions import ClientError
import json
import requests
from pathlib import Path
from io import StringIO
import pandas as pd
import sqlalchemy
import psycopg2
import numpy as np
import gspread
import pytz

  from pandas.core.computation.check import NUMEXPR_INSTALLED


ModuleNotFoundError: No module named 'gspread'

In [3]:
def get_available_agents(attendance_df, current_hour):
    """
    Get a list of available task-based agents for the current hour based on attendance DataFrame.
    Args:
        attendance_df (pd.DataFrame): DataFrame with agent attendance info
        current_hour (int): Current hour (24-hour format)
    Returns:
        list: List of available agent IDs
    """
    attendance_copy = attendance_df.copy()
    
    attendance_copy['start_time'] = attendance_copy['start_time'].astype(int)
    attendance_copy['end_time'] = attendance_copy['end_time'].astype(int)
    
    attendance_copy['assignment_start_time'] = attendance_copy['start_time'] - 1
    attendance_copy['assignment_end_time'] = attendance_copy['end_time'] - 1
    
    attendance_copy['assign_data'] = np.where(
        (current_hour >= attendance_copy['assignment_start_time']) & 
        (current_hour <= attendance_copy['assignment_end_time']),
        'yes', 'no')
    
    task_based_agents = attendance_copy.loc[
        (attendance_copy['project'] == 'task_based') & 
        (attendance_copy['assign_data'] == 'yes')]
    
    task_based_list = task_based_agents['agent_id'].values.tolist()
    print(f"Number of available agents: {len(task_based_list)}")
    return task_based_list
def fetch_and_process_queries(query_ids, blacklisted_retailers):
    """
    Fetch and process data from queries, removing blacklisted retailers and updating Google Sheets.
    Args:
        query_ids (pd.DataFrame): DataFrame with query IDs
        blacklisted_retailers (list): List of blacklisted retailer IDs
    Returns:
        pd.DataFrame: Cleaned DataFrame of query results
    """
    queries = query_ids['Task_based'].dropna().astype(int).tolist()
    print(f"Fetching data from {len(queries)} queries...")
    
    # Process queries
    dataframes = [ret_metabase("EGYPT", query) for query in queries]
    # print(dataframes)
    empty_queries = []
    for i, df in enumerate(dataframes):
        if df.empty:
            empty_queries.append(queries[i])
        else:
            print(f"Query {queries[i]} returned {len(df)} records")
        df.columns = map(str.lower, df.columns)
    if empty_queries:
        print(f"WARNING: Queries {empty_queries} returned empty dataframe!")
    
    # ----------------------------------------
    # write in google sheet available data
    # ----------------------------------------
    # check for empty queries and get project names
    for idx, row in query_ids.iterrows():
        query_id = row['Task_based']
        if pd.notna(query_id):
            df = ret_metabase("EGYPT", int(query_id))
            query_ids.at[idx, 'Available_data'] = 'Empty' if df.empty else str(len(df))
            # Add project name if available
            if not df.empty and 'PROJECT_NAME' in df.columns:
                query_ids.at[idx, 'Project_Name'] = df['PROJECT_NAME'].iloc[0]

    # Overwrite the sheet with updated full data
    google_sheets('Agents - Retailers', 'Query ID', 'overwrite', df=query_ids)
    
    # Combine and clean dataframes
    df_unfiltered = pd.concat(dataframes, ignore_index=True)
    print(f"Total tasks available: {df_unfiltered.shape[0]}")
    
    # Remove blacklisted retailers
    df_raw = df_unfiltered[~df_unfiltered['main_system_id'].isin(blacklisted_retailers)]
    df_raw = clean_column_id(df_raw, 'main_system_id')
    print(f"Removed {len(df_unfiltered) - len(df_raw)} blacklisted retailers")
    
    return df_raw

def assign_data_equal_projects(df, list):
    """
    Distribute rows of a DataFrame equally among a list of agents, grouped by project name.
    Special handling for credit projects: only agents 9191 and 5565 can be assigned to projects with 'credit' in the name.
    Args:
        df (pd.DataFrame): DataFrame with 'project_name'
        list (list): List of agent IDs or names
    Returns:
        pd.DataFrame: DataFrame with 'agent_assigned' column
    """
    df = df.sample(frac=1)  # Shuffle the data
    project_types = df['project_name'].unique()
    
    # Define credit agents and other agents
    credit_agents = [9191, 5565]
    other_agents = [agent for agent in list if agent not in credit_agents]
    
    assigned_data = pd.DataFrame()
    
    for project in project_types:
        project_df = df[df['project_name'] == project]
        project_df = project_df.reset_index(drop=True)
        
        # Check if project contains 'credit' in the name (case insensitive)
        is_credit_project = 'credit' in project.lower()
        
        if is_credit_project:
            # For credit projects, only assign to credit agents
            if len(credit_agents) > 0:
                rows_per_agent = len(project_df) // len(credit_agents)
                remainder = len(project_df) % len(credit_agents)
                
                # Distribute rows equally among credit agents
                for i, agent in enumerate(credit_agents):
                    start_idx = i * rows_per_agent
                    end_idx = start_idx + rows_per_agent
                    agent_data = project_df.iloc[start_idx:end_idx].copy()
                    agent_data['agent_assigned'] = agent
                    
                    # Handle remainder
                    if i < remainder:
                        extra_row = project_df.iloc[end_idx:end_idx+1].copy()
                        extra_row['agent_assigned'] = agent
                        agent_data = pd.concat([agent_data, extra_row])
                    
                    assigned_data = pd.concat([assigned_data, agent_data])
            else:
                print(f"Warning: No credit agents available for credit project '{project}'")
        else:
            # For non-credit projects, assign to other agents only
            if len(other_agents) > 0:
                rows_per_agent = len(project_df) // len(other_agents)
                remainder = len(project_df) % len(other_agents)
                
                # Distribute rows equally among other agents
                for i, agent in enumerate(other_agents):
                    start_idx = i * rows_per_agent
                    end_idx = start_idx + rows_per_agent
                    agent_data = project_df.iloc[start_idx:end_idx].copy()
                    agent_data['agent_assigned'] = agent
                    
                    # Handle remainder
                    if i < remainder:
                        extra_row = project_df.iloc[end_idx:end_idx+1].copy()
                        extra_row['agent_assigned'] = agent
                        agent_data = pd.concat([agent_data, extra_row])
                    
                    assigned_data = pd.concat([assigned_data, agent_data])
            else:
                print(f"Warning: No other agents available for non-credit project '{project}'")
    
    assigned_data = assigned_data.reset_index(drop=True)
    print("Assignment by project complete with credit project restrictions.")
    return assigned_data
def clean_column_id(df, column_name):
    """
    Clean a DataFrame column by removing commas and converting to integer type if possible.
    Args:
        df (pd.DataFrame): DataFrame to clean
        column_name (str): Name of the column to clean
    Returns:
        pd.DataFrame: DataFrame with cleaned column
    """
    # Ensure the column is treated as a string
    df[column_name] = df[column_name].astype(str)
    
    # Replace commas in the string
    df[column_name] = df[column_name].str.replace(',', '')
    
    # Convert back to an integer, if appropriate
    df[column_name] = df[column_name].astype('Int64', errors='ignore')
    
    return df

In [13]:
now = datetime.now() + timedelta(hours=3)
hour = int(str(now.time())[0:2])
attendance = ret_metabase("EGYPT", 13502)

task_based_list = get_available_agents(attendance, hour)

query_ids = google_sheets('Agents - Retailers', 'Query ID', 'get')
blacklisted_retailers = query_ids['Blacklisted_retailers'].dropna().astype(int).tolist()
data = fetch_and_process_queries(query_ids, blacklisted_retailers)

unique_projects = data["project_name"].unique()
# Filter out projects with "credit" in the name for calculation
projects_for_calculation = [proj for proj in unique_projects if 'credit' not in proj.lower()]
no_task = int(((50*(len(task_based_list)-2))/len(projects_for_calculation))+2)
final_data = []
for i in unique_projects: 
    un_data = data[data['project_name'] == i]
    # Skip the head filter for projects with "credit" in the name
    if 'credit' not in i.lower():
        un_data = un_data.head(no_task)
    as_data = assign_data_equal_projects(un_data,task_based_list) 
    final_data.append(as_data)
final_data = pd.concat(final_data, ignore_index=True)
final_data = final_data.drop_duplicates(subset=["main_system_id"], keep="first")

main_data = final_data.groupby('agent_assigned').head(50)

/home/ec2-user/service_account_key.json
Number of available agents: 7
/home/ec2-user/service_account_key.json
Fetching data from 2 queries...
/home/ec2-user/service_account_key.json
/home/ec2-user/service_account_key.json
Query 59703 returned 19227 records
Query 62555 returned 4681 records
/home/ec2-user/service_account_key.json


  query_ids.at[idx, 'Available_data'] = 'Empty' if df.empty else str(len(df))


/home/ec2-user/service_account_key.json
/home/ec2-user/service_account_key.json
Total tasks available: 23908
Removed 0 blacklisted retailers
Assignment by project complete with credit project restrictions.
Assignment by project complete with credit project restrictions.


In [14]:
pivot = final_data.pivot_table(
    index="agent_assigned",
    columns="project_name",
    values="main_system_id",   # or any column
    aggfunc="count",
    fill_value=0
)
# Calculate the difference (max - min) for each project
diff_row = pivot.max() - pivot.min()

# Add it as a new row called "Difference"
pivot.loc["Difference"] = diff_row

pivot

project_name,app acquisition,credit_project
agent_assigned,Unnamed: 1_level_1,Unnamed: 2_level_1
2484.0,51,0
2648.0,50,0
5280.0,50,0
5565.0,0,2339
8302.0,50,0
9191.0,0,2341
9785.0,49,0
Difference,51,2341
