# Libraries

In [1]:
from datetime import datetime
import pandas as pd
import calendar
import locale
#locale.setlocale(locale.LC_TIME, 'es_ES.UTF-8')
import pyodbc

pd.io.formats.excel.ExcelFormatter.header_style = None

# Parameters

In [2]:
year = 2021
week = 33

path = r'\\NASPRO.infovisiontv.com\DWH01\Cognos\Genomma lab Colombia\Informacion de punto de venta\Archivos de carga\UNIDROGAS\{0}\S {1}'
path_prices = r'C:\Users\jshernandezm\OneDrive - genommalabinternacional\01Code\02PriceUpdate\COL\1Data\2Catalogue\1Current\Precios'

filename_stock = 'INVENTARIO GENOMMA 18 AGOSTO 2021.xlsx'
filename_sales_amount = 'VENTA DISTRIBUCION GENOMMA DEL 09 AL 15 AGOSTO 2021.xlsx'
filename_sales_units = 'VENTA DISTRIBUCION EN UNIDADES GENOMMA DEL 09 AL 15 AGOSTO 2021.xlsx'

filename_prices = '\\Catalogo_Precios-202108.xlsx'

# Import data

In [3]:
# Ignore rows with no data
def get_data(data, column):
    df = data.copy()
    not_null = df[df[column].notnull()].index.min()
    df.columns = df.loc[not_null].tolist()
    df = df[not_null + 1:].copy()
    df.dropna(axis=0, inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df

In [4]:
df_stock = pd.read_excel(path.format(year, week) + '\\' + filename_stock)
df_stock = get_data(df_stock, 'Unnamed: 6')
#df_unit = pd.read_excel(path.format(year, week) + '\\' + filename_sales_units, sheet_name='BUCARAMANGA')
#df_amount = pd.read_excel(path.format(year, week) + '\\' + filename_sales_amount, sheet_name='BUCARAMANGA')

In [5]:
# Connection to DWH
conx = pyodbc.connect('Driver={SQL Server};'
                      'Server=SFEDWH01;'
                      'Database=Gnm_DWH;'
                      'Trusted_Connection=yes;')

In [6]:
# Data dates from DWH
query_dates = 'SELECT TmpFecha, TmpSemanaAnioGenomma FROM Gnm_DWH.dbo.Dim_Tiempo WHERE TmpAnioSemanaGenomma = {0} AND TmpSemanaAnioGenomma = {1}'
df_weeksGL = pd.read_sql(query_dates.format(year, week), conx)
# Data weeks from DWH
df_weeksGL.rename({'TmpFecha':'Fecha', 'TmpSemanaAnioGenomma':'Semana'}, axis=1, inplace=True)
df_weeksGL.shape

(7, 2)

# Transform

In [7]:
# SucCod
loc_inv = {'Inventario BQUILLA':1001, 'Inventario BOGOTA':1002, 'Inventario BMANGA':1003,
           'Inventario MEDELLIN':1004, 'Inventario VDUPAR':1006, 'BARRANQUILLA':1001, 'BOGOTA':1002,
           'BUCARAMANGA':1003, 'MEDELLIN':1004, 'VALLEDUPAR':1006, 'SANTAMARTA':1005}

In [8]:
# month abbreviations
month_abbr = [m.capitalize() for m in list(calendar.month_abbr)]
nmonth_abbr = dict(zip(month_abbr, range(13)))

In [9]:
# Clean dates and transform to datetime
def clean_date(str_date):
    year = int(str_date.split('/')[1])
    month = nmonth_abbr[str_date[:3]]
    day = int(str_date.split('/')[0][-2:])
    return datetime(year, month, day)

In [10]:
def clean_name_sku(name):
    try:
        return name.split('  COMERCIAL ')[0]
    except:
        return name

In [11]:
# Merge between units sales & amount sales
def clean_join(sales_units, sales_amount, weeks_GL, city):
    sales_units['ID'] = sales_units['Fecha'] + sales_units['Sku']
    sales_amount['ID'] = sales_amount['Fecha'] + sales_amount['Sku']
    total_sales = sales_units.merge(sales_amount[['ID', 'Ven Monto']], on='ID', how='outer')[['Fecha', 'Sku', 'Ven Und', 'Ven Monto']]
    total_sales['Fecha'] = total_sales['Fecha'].map(clean_date)
    total_sales = total_sales.merge(weeks_GL, on='Fecha', how='left')
    final = total_sales.pivot_table(index=['Semana', 'Sku'], values=['Ven Und', 'Ven Monto'], aggfunc='sum').reset_index()
    final['Loc'] = city
    return final[['Semana', 'Loc', 'Sku', 'Ven Und', 'Ven Monto']]

## Sales

### Import all PDV

In [12]:
%%time
# Import units sales & amount sales and tranform data
data_sales = pd.DataFrame()
for city in ['BARRANQUILLA', 'BOGOTA', 'BUCARAMANGA', 'MEDELLIN', 'VALLEDUPAR']:
    df_unit = pd.read_excel(path.format(year, week) + '\\' + filename_sales_units, sheet_name=city)
    df_unit = get_data(df_unit, 'Unnamed: 7')
    df_amount = pd.read_excel(path.format(year, week) + '\\' + filename_sales_amount, sheet_name=city)
    df_amount = get_data(df_amount, 'Unnamed: 7')
    cols_unit = df_unit.columns
    cols_amount = df_amount.columns
    data_unit = df_unit.melt(id_vars='Fecha', value_vars=cols_unit[1:], var_name='Sku', value_name='Ven Und')
    data_amount = df_amount.melt(id_vars='Fecha', value_vars=cols_amount[1:], var_name='Sku', value_name='Ven Monto')
    data = clean_join(data_unit, data_amount, df_weeksGL, city)
    data_sales = pd.concat([data_sales, data], axis=0)
# Create columns "CodSuc"
data_sales['CodSuc'] = data_sales['Loc'].map(loc_inv)
# Remove last whitespace from "Sku"
data_sales['Sku'] = data_sales['Sku'].str.rstrip()
# Remove CEDIS, this make match with the names of stocks
data_sales['Sku'] = data_sales['Sku'].map(clean_name_sku)
data_sales.reset_index(drop=True, inplace=True)

Wall time: 4.02 s


In [34]:
data_unit

Unnamed: 0,Fecha,Sku,Ven Und,ID
0,Aug 09/2021,LOMECAN V 2% CREMA VAGINAL TUBO X 20 GR (2) MA...,3,Aug 09/2021LOMECAN V 2% CREMA VAGINAL TUBO X 2...
1,Aug 10/2021,LOMECAN V 2% CREMA VAGINAL TUBO X 20 GR (2) MA...,1,Aug 10/2021LOMECAN V 2% CREMA VAGINAL TUBO X 2...
2,Aug 11/2021,LOMECAN V 2% CREMA VAGINAL TUBO X 20 GR (2) MA...,1,Aug 11/2021LOMECAN V 2% CREMA VAGINAL TUBO X 2...
3,Aug 12/2021,LOMECAN V 2% CREMA VAGINAL TUBO X 20 GR (2) MA...,0,Aug 12/2021LOMECAN V 2% CREMA VAGINAL TUBO X 2...
4,Aug 13/2021,LOMECAN V 2% CREMA VAGINAL TUBO X 20 GR (2) MA...,0,Aug 13/2021LOMECAN V 2% CREMA VAGINAL TUBO X 2...
...,...,...,...,...
107,Aug 11/2021,TUKOL-D SUSPENSION X 125 ML COMERCIAL VALLEDUPAR,5,Aug 11/2021TUKOL-D SUSPENSION X 125 ML COMERC...
108,Aug 12/2021,TUKOL-D SUSPENSION X 125 ML COMERCIAL VALLEDUPAR,2,Aug 12/2021TUKOL-D SUSPENSION X 125 ML COMERC...
109,Aug 13/2021,TUKOL-D SUSPENSION X 125 ML COMERCIAL VALLEDUPAR,0,Aug 13/2021TUKOL-D SUSPENSION X 125 ML COMERC...
110,Aug 14/2021,TUKOL-D SUSPENSION X 125 ML COMERCIAL VALLEDUPAR,0,Aug 14/2021TUKOL-D SUSPENSION X 125 ML COMERC...


In [13]:
data_sales.isnull().sum()

Semana       0
Loc          0
Sku          0
Ven Und      0
Ven Monto    0
CodSuc       0
dtype: int64

## Stock

### Clean

In [14]:
df_stock.columns

Index(['Codigo', 'Nombre comercial', 'Laboratorio', 'Linea', 'Precio',
       'Inventario BQUILLA', 'Inventario BOGOTA', 'Inventario BMANGA',
       'Inventario MEDELLIN', 'Inventario STMARTA', 'Inventario VDUPAR'],
      dtype='object')

In [15]:
# Unpivoting
data_stock = df_stock.melt(id_vars=['Codigo', 'Nombre comercial', 'Laboratorio', 'Linea', 'Precio'],
                           value_vars=['Inventario BQUILLA', 'Inventario BOGOTA', 'Inventario BMANGA',
                                       'Inventario MEDELLIN', 'Inventario STMARTA', 'Inventario VDUPAR'], 
                           var_name='Loc', value_name='Inv Und')

In [16]:
data_stock.isnull().sum()

Codigo              0
Nombre comercial    0
Laboratorio         0
Linea               0
Precio              0
Loc                 0
Inv Und             0
dtype: int64

In [17]:
# Create column "Inv Monto"
# data_stock['Inv Monto'] = data_stock['Inv Und']*data_stock['Precio']
# Create column "CodSuc"
data_stock['CodSuc'] = data_stock['Loc'].map(loc_inv)
# Remove STMARTA
data_stock.dropna(axis=0, inplace=True)
data_stock.reset_index(drop=True, inplace=True)
# Create column "CodSuc"
data_stock['CodSuc'] = data_stock['CodSuc'].map(int)

In [18]:
data_stock.isnull().sum()

Codigo              0
Nombre comercial    0
Laboratorio         0
Linea               0
Precio              0
Loc                 0
Inv Und             0
Inv Monto           0
CodSuc              0
dtype: int64

# Merge Sales & Stock

In [19]:
# Create ID
data_sales['ID'] = data_sales['CodSuc'].map(str) + data_sales['Sku'].map(str)
data_stock['ID'] = data_stock['CodSuc'].map(str) + data_stock['Nombre comercial']

In [20]:
(data_sales['ID'].value_counts() > 1).sum()

0

In [21]:
(data_stock['ID'].value_counts() > 1).sum()

0

In [22]:
# Merge
sales_stock = pd.merge(data_sales, data_stock, on='ID', how='outer')

In [25]:
# Clean some columns
sales_stock['Semana'].fillna(value=data_sales['Semana'].unique()[0], inplace=True)
#sales_stock.loc[:, ['Ven Und', 'Ven Monto']] = sales_stock[['Ven Und', 'Ven Monto']].fillna(0, axis=1)

# Correction some skus
skus_correct = {650240004650:650240007828, 650240004285:650240009563, 
                650240037474:650240037177, 6502400030840:650240030840, 
                65024002819988:650240038129, 6502400030840:650240030840}

final = sales_stock[['Semana', 'Codigo', 'Nombre comercial', 'CodSuc_y', 'Ven Und', 'Ven Monto', 'Inv Und', 'Inv Monto']].copy()
final['Codigo'] = final['Codigo'].map(int)
final['Codigo'] = final['Codigo'].replace(skus_correct)

# Remove skus DURACELL
final = final[~final['Nombre comercial'].str.contains('DURA')].copy()
final.reset_index(drop=True, inplace=True)

In [27]:
# Filter and rename columns
final['Semana'] = final['Semana'].map(int)
final.rename({'Codigo':'EAN', 'CodSuc_y':'CodSuc', 'Nombre comercial':'Descripción'}, axis=1, inplace=True)

In [28]:
final['Año'] = year

In [29]:
final['Ven Und'].sum()

5866.0

In [30]:
filename = '{1}{0}_UNIDROGAS.xlsx'
sheet_name = 'S{0}'.format(str(week).zfill(2))
final[['Año', 'Semana', 'EAN', 'Descripción', 'CodSuc', 'Ven Und', 'Ven Monto', 'Inv Und', 'Inv Monto']].to_excel(path.format(year, week) + '\\' + filename.format(year, week), sheet_name=sheet_name, index=False)