In [1]:
"""This script will be used to dissagregate the volume give either by our commercial intelligence or our own forecast model.
There are 3 types of levels to this dissagregation. Abs_type = 1 which takes in the general volume of the month. Abs_type = 0
gives us a dataframe from excel with the volume of each city. Abs_type = -1 gives us a dataframe from excel with 
the volume of each factory.
"""
#import libraries

import pandas as pd # dataframe library
import numpy as np #mathematical library
import datetime #date library
from dateutil.relativedelta import relativedelta #subtract periods to a date
import sys #system exit
import time

#import our sql script to connecto to engine and return dataframe. In this case, 
#the %run is used to "import" our sql connection notebook

%run ..\sql\connect_sql_server.ipynb
#from ipynb.fs.full.connect_sql_server import querySQL --> this is another option to import another notebook when in the same folder

#import warnings library to then avoid the warnings given by jupyter
import warnings
warnings.filterwarnings('ignore')



In [2]:
#function to send dataframe to excel
def send_excel(df_stational_factor_hour,df_hour_general,df_stational_factor_day,df_weekday_general,country):
    #get current datetime
    now = pd.to_datetime("now").strftime("%Y-%m-%d-%H-%M-%S")
    #create excel worksheet
    create_excel = pd.ExcelWriter("../data/"+country+"/forecast_" + now + ".xlsx", engine='xlsxwriter') #create excel to save dataframe
    df_stational_factor_hour.to_excel( create_excel, sheet_name="FE Hora Dia Planta", index=False ) #send dataframe day to excel sheet created previously
    df_hour_general.to_excel( create_excel, sheet_name="FE Hora Dia", index=False ) #send dataframe day to excel sheet created previously
    df_stational_factor_day.to_excel( create_excel, sheet_name="FE Dia Semana Planta", index=False ) #send dataframe day to excel sheet created previously
    df_weekday_general.to_excel( create_excel, sheet_name="FE Dia Semana", index=False ) #send dataframe day to excel sheet created previously
    create_excel.save() #save the workbook
    
#we need to create a dataframe to get the stational factors for each week, day of the week and factory
def get_stational_factors(df_history,country,year_disaggregate,month_disaggregate,inactivate_temp,calendar_sql,volume,abs_type,active_factories):
    
    
    #first organize the historic dataframe by date
    #convert string date to datetime type
    df_history['FechaEntrega'] = pd.to_datetime(df_history['FechaEntrega'])
    df_history = df_history.sort_values(by="FechaEntrega")
    #filter out inactive factories
    df_history = df_history[df_history.Planta.isin(active_factories.Centro)]
    if len(inactivate_temp) > 0:
            df_history = df_history[~df_history.Planta.isin(inactivate_temp)]
    
    df = df_history
    
    df_stational_factor_diasemana = df_history.groupby(['DiaSemana','Planta'])['totalEntregado'].sum()
    
    #reset index to get semana_relativa and planta as columns
    df_stational_factor_diasemana = df_stational_factor_diasemana.reset_index()
    
    
    
    #-------------we will basically repeat the previous process but for stational HOUR factors-------------------
    df_stational_factor_hour = df_history.groupby(['hora_entrega','Planta'])['totalEntregado'].sum()
    
    #reset index to get semana_relativa and planta as columns
    df_stational_factor_hour = df_stational_factor_hour.reset_index()
    
    #convert columns to corresponding datatype for good measure.
    df_stational_factor_hour['totalEntregado'] = df_stational_factor_hour['totalEntregado'].astype(float)
    df_stational_factor_hour['hora_entrega'] = df_stational_factor_hour['hora_entrega'].astype(int)
    #Replace nan values with 0
    df_stational_factor_hour['totalEntregado'] = df_stational_factor_hour['totalEntregado'].fillna(0)
    
    #create dataframe to group volume totals by factory to then merge with the stational df
    vol_por_planta_hour = df_stational_factor_hour.groupby('Planta')['totalEntregado'].sum()
    #reset index to put factory as column
    vol_por_planta_hour = pd.DataFrame(vol_por_planta_hour.reset_index())
    #rename volumen column
    vol_por_planta_hour.rename(columns = {'totalEntregado':'vol_total_planta'}, inplace = True)
    
    #left merge to get total of each factory in corresponding row
    df_stational_factor_hour = pd.merge(df_stational_factor_hour, 
                      vol_por_planta_hour, 
                      on ='Planta', 
                      how ='left') 
    #create new column to get stational factor
    df_stational_factor_hour['%FE_hour'] =  df_stational_factor_hour['totalEntregado']/df_stational_factor_hour['vol_total_planta']
    df_stational_factor_hour = df_stational_factor_hour.sort_values(["Planta","hora_entrega"])
    
    print("FACTORES ESTACIONALES HORA POR PLANTA")
    print(df_stational_factor_hour)
    
    #we will create an extra dataframe for factories that dont have stational factories
    df_hour_general = df_stational_factor_hour.groupby('hora_entrega')['totalEntregado'].sum().reset_index()
    df_hour_general['%FE_general'] = df_hour_general['totalEntregado']/df_hour_general['totalEntregado'].sum()

 
    print("FACTORES ESTACIONALES HORA GENERAL")
    
    print(df_hour_general)
    #-------------we will basically repeat the previous process but for stational DAY factors-------------------
    

    df_stational_factor_day = df_history[df_history['DiaSemana'] != 1]
    #groupby day and factory with the sum of total volume
    df_stational_factor_day = df_stational_factor_day.groupby(['DiaSemana','Planta'])['totalEntregado'].sum()

    #reset index to get semana_relativa and planta as columns
    df_stational_factor_day = df_stational_factor_day.reset_index()
    
    #convert columns to corresponding datatype for good measure.
    df_stational_factor_day['totalEntregado'] = df_stational_factor_day['totalEntregado'].astype(float)
    df_stational_factor_day['DiaSemana'] = df_stational_factor_day['DiaSemana'].astype(int)
    #Replace nan values with 0
    df_stational_factor_day['totalEntregado'] = df_stational_factor_day['totalEntregado'].fillna(0)
    
    #create dataframe to group volume totals by factory to then merge with the stational df
    vol_por_planta_day = df_stational_factor_day.groupby('Planta')['totalEntregado'].sum()
    #reset index to put factory as column
    vol_por_planta_day = pd.DataFrame(vol_por_planta_day.reset_index())
    #rename volumen column
    vol_por_planta_day.rename(columns = {'totalEntregado':'vol_total_planta'}, inplace = True)
    
    #left merge to get total of each factory in corresponding row
    df_stational_factor_day = pd.merge(df_stational_factor_day, 
                      vol_por_planta_day, 
                      on ='Planta', 
                      how ='left')
    
    #add stational factor column
    df_stational_factor_day['%FE_day'] =  df_stational_factor_day['totalEntregado']/df_stational_factor_day['vol_total_planta']
    #sort the dataframe
    df_stational_factor_day = df_stational_factor_day.sort_values(["Planta","DiaSemana"])
    
    print("FACTORES ESTACIONALES DIA SEMANA POR PLANTA")
    
    print(df_stational_factor_day)
    
    df_weekday_general = df_stational_factor_day.groupby('DiaSemana')['totalEntregado'].sum().reset_index()
    df_weekday_general['%FE_general'] = df_weekday_general['totalEntregado']/df_weekday_general['totalEntregado'].sum()
    
    print("FACTORES ESTACIONALES DIA SEMANA EN GENERAL")
    
    print(df_weekday_general)
    
    """    
    
        #-------------------we will get the stational factors by week and factory-------------------
    
 
    #groupby week and factory with the sum of total volume
    df_stational_factor_week = df_history.groupby(['Semana_Relativa','Planta'])['totalEntregado'].sum()

    #reset index to get semana_relativa and planta as columns
    df_stational_factor_week = df_stational_factor_week.reset_index()

    #filter weeks in history dataset
    df_stational_factor_week = df_stational_factor_week[df_stational_factor_week.Semana_Relativa.isin(calendar_sql.Semanas_mes)]
    
    #find weeks that are not present in historic data but are in the month to dissaggregate
    week_not_present = list(set(calendar_sql['Semanas_mes'].unique()).symmetric_difference(set(df_stational_factor_week['Semana_Relativa'].unique())))

    
    
    #convert columns to corresponding datatype for good measure.
    df_stational_factor_week['totalEntregado'] = df_stational_factor_week['totalEntregado'].astype(float)
    df_stational_factor_week['Semana_Relativa'] = df_stational_factor_week['Semana_Relativa'].astype(int)
    #Replace nan values with 0
    df_stational_factor_week['totalEntregado'] = df_stational_factor_week['totalEntregado'].fillna(0)
    
    #create dataframe to group volume totals by factory to then merge with the stational df
    vol_por_planta_week = df_stational_factor_week.groupby('Planta')['totalEntregado'].sum()
    #reset index to put factory as column
    vol_por_planta_week = pd.DataFrame(vol_por_planta_week.reset_index())
    #rename volumen column
    vol_por_planta_week.rename(columns = {'totalEntregado':'vol_total_planta'}, inplace = True)
    
    #left merge to get total of each factory in corresponding row
    df_stational_factor_week = pd.merge(df_stational_factor_week, 
                      vol_por_planta_week, 
                      on ='Planta', 
                      how ='left') 
    #create new column to get stational factor
    df_stational_factor_week['%FE_week'] =  df_stational_factor_week['totalEntregado']/df_stational_factor_week['vol_total_planta']
    df_stational_factor_week = df_stational_factor_week.sort_values(by="Planta") #sort our dataframe by factory
    
    #we will create an extra dataframe for factories that dont have stational factories
    df_week_general = df_stational_factor_week.groupby('Semana_Relativa')['totalEntregado'].sum().reset_index()
    df_week_general['%FE_general'] = df_week_general['totalEntregado']/df_week_general['totalEntregado'].sum()

        
    """ 
        
    print("exitoso")
    return [df_stational_factor_hour,df_hour_general,df_stational_factor_day,df_weekday_general]


In [3]:
country = 'Colombia' #state the country
cliente = '50117983'
start_date_history = datetime.datetime(2023, 1, 1) #the start date for our model to analyze
end_date_history = datetime.datetime(2023, 4 , 3) #the end date for our model to analyze
df_history = querySQL(  "{CALL SCAC_AP20_BaseDesagregacion_cliente_horas_V2 (?,?,?,?)}", (country, cliente, start_date_history.strftime("%Y-%m-%d"), end_date_history.strftime("%Y-%m-%d") ) )
print(df_history.head())

    Año Mes Planta TipoPlanta totalEntregado Semana_Relativa DiaSemana  \
0  2023   1   F003    Central           17.0               1         4   
1  2023   1   F003    Central          12.25               1         5   
2  2023   1   F003    Central           6.25               1         5   
3  2023   1   F003    Central            8.0               1         6   
4  2023   1   F003    Central            3.0               1         6   

  FechaEntrega DiasOperativos  Ciudad hora_entrega  
0   2023-01-04              1  Bogotá           12  
1   2023-01-05              6  Bogotá           16  
2   2023-01-05              7  Bogotá            9  
3   2023-01-06              9  Bogotá           12  
4   2023-01-06             10  Bogotá           15  


In [4]:
year_disaggregate = 2023 # year to disaggregate 
month_disaggregate = 4
volume = 134566 #only for abs_type = 1
inactivate_temp = ['T001','T002','T003','T004','F014','F020','F031'] #place inactive factories if there are any
abs_type = 1
"""
PARAMETROS:
absorcionEstadistica = 1  -> get general volume
absorcionEstadistica = 0  -> get volume by city
absorcionEstadistica = -1 -> get volume by factory

"""
#get weeks corresponding to the desired month
calendar_sql = querySQL( "select * from SCAC_AT3_DiasHabilesFuente where pais = ? and año = ? and mes = ? order by [Fecha de entrega]", (country,year_disaggregate,month_disaggregate) )
#get active factories to filter out inactive
active_factories = querySQL( "select Centro, [Planta Unica] as Planta, [Desc Cluster] as Cluster, Ciudad_Cluster as Ciudad  from SCAC_AT1_NombreCluster where pais = ? and activo = 1 order by Centro", (country) )
#execute dissagregation
df_stational_factor_hour,df_hour_general,df_stational_factor_day,df_weekday_general = get_stational_factors(df_history,country,year_disaggregate,month_disaggregate,inactivate_temp,calendar_sql,volume,abs_type,active_factories)
send_excel(df_stational_factor_hour,df_hour_general,df_stational_factor_day,df_weekday_general,country)

FACTORES ESTACIONALES HORA POR PLANTA
     hora_entrega Planta  totalEntregado  vol_total_planta  %FE_hour
1               7   F001          189.50            6634.0  0.028565
12              8   F001          934.50            6634.0  0.140865
24              9   F001          785.50            6634.0  0.118405
36             10   F001          764.50            6634.0  0.115240
48             11   F001          745.25            6634.0  0.112338
..            ...    ...             ...               ...       ...
110            15   FA04           58.50             782.5  0.074760
122            16   FA04           90.25             782.5  0.115335
134            17   FA04           47.00             782.5  0.060064
146            18   FA04           29.00             782.5  0.037061
153            19   FA04            5.00             782.5  0.006390

[167 rows x 5 columns]
FACTORES ESTACIONALES HORA GENERAL
    hora_entrega  totalEntregado  %FE_general
0              6           16