# Libraries

In [1]:
from datetime import datetime
import pandas as pd
import calendar
import locale
import pyodbc
## Remove default formatting in header
pd.io.formats.excel.ExcelFormatter.header_style = None
## Customized float formatting
pd.options.display.float_format = '{:,.2f}'.format

# Parameters

In [2]:
year = 2021
week = 50

## Parameters of Unidrogas Distribución
path = r'\\NASPRO.infovisiontv.com\DWH01\Cognos\Genomma lab Colombia\Informacion de punto de venta\Archivos de carga\UNIDROGAS\{0}\S {1}'
filename_stock = 'INVENTARIO GENNOMMA 15 DICIEMBRE 2021.xlsx'
filename_sales_units = 'VENTA DISTRIBUCION EN UNIDADES GENOMMA DEL 06 AL 12 DICIEMBRE 2021.xlsx'

## Parameters of Price Data
path_prices = r'C:\Users\jshernandezm\OneDrive - genommalabinternacional\01Code\02PriceUpdate\COL\1Data\2Catalogue\1Current\Precios'
filename_prices = '\\Catalogo_Precios-202112.xlsx'

# Import data

In [3]:
# Ignore rows with no data
def get_data(data, column):
    df = data.copy()
    not_null = df[df[column].notnull()].index.min()
    df.columns = df.loc[not_null].tolist()
    df = df[not_null + 1:].copy()
    df.dropna(axis=0, inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df

## Import Stocks

In [4]:
df_stock = pd.read_excel(path.format(year, week) + '\\' + filename_stock)
df_stock = get_data(df_stock, 'Unnamed: 6')

# Connect to DWH

In [5]:
## Connection to DWH
conx = pyodbc.connect('Driver={SQL Server};'
                      'Server=SFEDWH01;'
                      'Database=Gnm_DWH;'
                      'Trusted_Connection=yes;')

## Dates from DWH
query_dates = 'SELECT TmpFecha AS Fecha, TmpSemanaAnioGenomma AS Semana FROM Gnm_DWH.dbo.Dim_Tiempo WHERE TmpAnioSemanaGenomma = {0} AND TmpSemanaAnioGenomma = {1}'
df_weeksGL = pd.read_sql(query_dates.format(year, week), conx)

# Transform

In [6]:
# SucCod
loc_inv = {'Inventario BQUILLA':1001, 'Inventario BOGOTA':1002, 'Inventario BMANGA':1003,
           'Inventario MEDELLIN':1004, 'Inventario VDUPAR':1006, 'BARRANQUILLA':1001, 'BOGOTA':1002,
           'BUCARAMANGA':1003, 'MEDELLIN':1004, 'VALLEDUPAR':1006, 'SANTAMARTA':1005}

In [7]:
# month abbreviations
month_abbr = [m.capitalize() for m in list(calendar.month_abbr)]
nmonth_abbr = dict(zip(month_abbr, range(13)))

In [8]:
# Clean dates and transform to datetime
def clean_date(str_date):
    year = int(str_date.split('/')[1])
    month = nmonth_abbr[str_date[:3]]
    day = int(str_date.split('/')[0][-2:])
    return datetime(year, month, day)

In [9]:
def clean_name_sku(name):
    try:
        return name.split('  COMERCIAL ')[0]
    except:
        return name

This function merges unit sales and GL weeks:

In [10]:
def clean_join(sales_units, weeks_GL, city):
    total_sales = sales_units[['Fecha', 'Sku', 'Ven Und']]
    total_sales['Fecha'] = total_sales['Fecha'].map(clean_date)
    total_sales = total_sales.merge(weeks_GL, on='Fecha', how='left')
    final = total_sales.pivot_table(index=['Semana', 'Sku'], values=['Ven Und'], aggfunc='sum').reset_index()
    final['Loc'] = city
    return final[['Semana', 'Loc', 'Sku', 'Ven Und']]

## Sales

### Import and Clean

In [11]:
%%time
## Import units sales and tranform data
data_sales = pd.DataFrame()
for city in ['BARRANQUILLA', 'BOGOTA', 'BUCARAMANGA', 'MEDELLIN', 'VALLEDUPAR']:
    df_unit = pd.read_excel(path.format(year, week) + '\\' + filename_sales_units, sheet_name=city)
    df_unit = get_data(df_unit, 'Unnamed: 7')
    cols_unit = df_unit.columns    
    data_unit = df_unit.melt(id_vars='Fecha', value_vars=cols_unit[1:], var_name='Sku', value_name='Ven Und')
    data = clean_join(data_unit, df_weeksGL, city)
    data_sales = pd.concat([data_sales, data], axis=0)
## Create columns "CodSuc"
data_sales['CodSuc'] = data_sales['Loc'].map(loc_inv)
## Remove last whitespace from "Sku"
data_sales['Sku'] = data_sales['Sku'].str.rstrip()
## Remove CEDIS, this make match with the names of stocks
data_sales['Sku'] = data_sales['Sku'].map(clean_name_sku)
data_sales.reset_index(drop=True, inplace=True)

Wall time: 2.04 s


In [12]:
data_sales.isnull().sum()

Semana     0
Loc        0
Sku        0
Ven Und    0
CodSuc     0
dtype: int64

## Stock

### Clean

In [13]:
## Unpivoting data of stock
data_stock = df_stock.melt(id_vars=['Codigo', 'Nombre comercial', 'Laboratorio', 'Linea', 'Precio'],
                           value_vars=['Inventario BQUILLA', 'Inventario BOGOTA', 'Inventario BMANGA',
                                       'Inventario MEDELLIN', 'Inventario STMARTA', 'Inventario VDUPAR'], 
                           var_name='Loc', value_name='Inv Und')

Some EANs are wrong, so we fix it as discussed with Cristian Yepez:

In [14]:
skus_correct = {650240004650:650240007828, 650240004285:650240009563, 
                650240037474:650240037177, 6502400030840:650240030840, 
                65024002819988:650240038129, 6502400030840:650240030840}

data_stock['Codigo'] = data_stock['Codigo'].replace(skus_correct)
data_stock.loc[data_stock['Nombre comercial']=='GOICOECHEA EFECTO ANTI-CELULITIS FRASCO X 400 ML', 'Codigo']=650240004605
data_stock.loc[data_stock['Nombre comercial']=='LOMECAN 200 MG CAJA X 3 OVULOS', 'Codigo']=6502400030840

In [15]:
## Create column "CodSuc"
data_stock['CodSuc'] = data_stock['Loc'].map(loc_inv)

## Remove STMARTA
data_stock.dropna(axis=0, inplace=True)
data_stock.reset_index(drop=True, inplace=True)

## Create column "CodSuc"
data_stock['CodSuc'] = data_stock['CodSuc'].map(int)

Check if there is any column with missing data

In [16]:
data_stock.isnull().sum()

Codigo              0
Nombre comercial    0
Laboratorio         0
Linea               0
Precio              0
Loc                 0
Inv Und             0
CodSuc              0
dtype: int64

As we fix the barcodes, we create a pivot table so that there are no duplicate values:

In [17]:
data_stock = data_stock.pivot_table(index=['Codigo', 'Nombre comercial', 'Laboratorio', 'Linea', 'Precio', 'Loc', 'CodSuc'], 
                                    values=['Inv Und'],
                                    aggfunc='sum').reset_index().copy()

# Merge Sales & Stock

## Preparation

In [18]:
## Create ID
data_sales['ID'] = data_sales['CodSuc'].map(str) + data_sales['Sku'].map(str)
data_stock['ID'] = data_stock['CodSuc'].map(str) + data_stock['Nombre comercial']

We validate that there are no repeated ID's 

In [19]:
(data_sales['ID'].value_counts() > 1).sum()

0

In [20]:
(data_stock['ID'].value_counts() > 1).sum()

0

## Merge data

In [21]:
sales_stock = pd.merge(data_sales, data_stock, on='ID', how='outer')

## Cleaning the merge

In [22]:
## Clean some columns
sales_stock['Semana'].fillna(value=data_sales['Semana'].unique()[0], inplace=True)
sales_stock['Ven Und'].fillna(0, inplace=True)

## Correction some skus
skus_correct = {650240004650:650240007828, 650240004285:650240009563, 
                650240037474:650240037177, 6502400030840:650240030840, 
                65024002819988:650240038129, 6502400030840:650240030840}

final = sales_stock[['Semana', 'Codigo', 'Nombre comercial', 'CodSuc_y', 'Ven Und', 'Inv Und']].copy()
final['Codigo'] = final['Codigo'].map(int)
final['Codigo'] = final['Codigo'].replace(skus_correct)

## Remove skus DURACELL
#final = final[~final['Nombre comercial'].str.contains('DURA')].copy()
#final.reset_index(drop=True, inplace=True)

In [23]:
final[['Ven Und', 'Inv Und']].sum()

Ven Und     12939.0
Inv Und    136370.0
dtype: float64

In [24]:
## Filter and rename columns
final['Año'] = year
final['Semana'] = final['Semana'].map(int)
final.rename({'Codigo':'EAN', 'CodSuc_y':'CodSuc', 'Nombre comercial':'Descripción'}, axis=1, inplace=True)

## Merge the prices

In [25]:
## Import and cleaning
df_prices = pd.read_excel(path_prices + filename_prices)
data_prices = df_prices.loc[df_prices['Cliente']=='UNIDROGAS S.A.', ['EAN', 'LISTA']]
data_final = final.merge(data_prices, on='EAN', how='left')
data_final['LISTA'].fillna(1, inplace=True)

## Amount of sales and stock
data_final['Ven Monto'] = data_final['Ven Und']*data_final['LISTA']
data_final['Inv Monto'] = data_final['Inv Und']*data_final['LISTA']

# Export Data

In [26]:
filename = '{1}{0}_UNIDROGAS.xlsx'
filename = filename.format(year, week)
sheet_name = 'S{0}'.format(str(week).zfill(2))

cols = ['Año', 'Semana', 'EAN', 'Descripción', 'CodSuc', 'Ven Und', 'Ven Monto', 'Inv Und', 'Inv Monto']
data_final[cols].to_excel(path.format(year, week) + '\\' + filename, sheet_name=sheet_name, index=False)

Total units sold and total units in stock:

In [27]:
data_final[['Ven Und', 'Inv Und']].sum()

Ven Und     12939.0
Inv Und    136370.0
dtype: float64