# Libraries

In [1]:
import pandas as pd
from datetime import datetime
import pyodbc
import shutil
import os
from openpyxl import load_workbook

# Parameters

In [2]:
year = 2021
week = 30
path = r'C:\Users\jshernandezm\genommalabinternacional\Francisco Jose Delfino - Stocks_canal\{0}\Semana {1}'
file_name = '{0}_{1}_Disval_INV.txt'

# Connections to DWH

In [3]:
conn1 = pyodbc.connect('Driver={SQL Server};'
                     'Server=SFEDWH01;'
                     'Database=Gnm_DWH;'
                     'Trusted_Connection=yes;')

conn2 = pyodbc.connect('Driver={SQL Server};'
                     'Server=SFEDWH01;'
                     'Database=Gnm_MasterOp;'
                     'Trusted_Connection=yes;')

query_days = '''SELECT TmpFecha
                FROM Gnm_DWH.dbo.Dim_Tiempo
                WHERE TmpAnioSemanaGenomma = {0} AND TmpSemanaAnioGenomma = {1}'''

query_sucs = '''SELECT DISTINCT SUC.SucCodCliente, SUC.SucId
                    FROM (SELECT DISTINCT PaisNombre, CadID FROM Gnm_MasterOp.dbo.vw_EstructuraClientesSegPTVTotal
                    WHERE PaisNombre = 'Argentina') AS CLIE
                LEFT JOIN (
                    SELECT DISTINCT SucId, SucCodCliente, CadId FROM Gnm_MasterOp.dbo.vw_EstructuraSucursalesTotal) AS SUC
                ON CLIE.CadID = SUC.CadID 
                '''

# Stocks

## Load data

In [4]:
df_stock = pd.DataFrame()
path = r'C:\Users\jshernandezm\genommalabinternacional\Francisco Jose Delfino - Stocks_canal\{0}\Semana {1}'
# Dates
df_days = pd.read_sql(query_days.format(year, week), conn1)
# Search the file in differents locations
try:
    df = pd.read_csv(path.format(year, str(week).zfill(2)) + '\\' + file_name.format(year, str(week).zfill(2)), header=None)
except:
    path = r'C:\Users\jshernandezm\genommalabinternacional\Francisco Jose Delfino - Stocks_canal\Stocks\{0}\Semana {1}'
    df = pd.read_csv(path.format(year, str(week).zfill(2)) + '\\' + file_name.format(year, str(week).zfill(2)), header=None)
# Assign date into the dataframe
df['Fecha_Stock'] = df_days['TmpFecha'].max()
df_stock = pd.concat([df_stock, df])

## Clean Data

In [5]:
# Formatting data stocks
df_stock.rename(dict(zip(range(5), ['Cod_Cliente', 'EAN o UPC Genomma', 'Descripcion_Prod', 'Descripcion_Local', 'Unidades'])), axis=1, inplace=True)
df_stock['Cadena'] = 'Disval'
df_stock['Cod_Prod'] = df_stock['EAN o UPC Genomma']
df_stock['Cod_Local'] = '89310'
data_stock = df_stock[['Fecha_Stock', 'Cadena', 'Cod_Prod', 'EAN o UPC Genomma', 'Descripcion_Prod', 'Cod_Local', 'Descripcion_Local', 'Unidades']].copy()
data_stock = data_stock[~data_stock['EAN o UPC Genomma'].isin([999999, 9999999999999])].copy()
# Order columns
final_stock = data_stock[['Fecha_Stock', 'Cadena', 'Cod_Prod', 'EAN o UPC Genomma', 'Descripcion_Prod', 'Cod_Local', 'Descripcion_Local', 'Unidades']].copy()

In [6]:
final_stock.isnull().sum()

Fecha_Stock          0
Cadena               0
Cod_Prod             0
EAN o UPC Genomma    0
Descripcion_Prod     0
Cod_Local            0
Descripcion_Local    0
Unidades             0
dtype: int64

# Sell Out

In [7]:
# Select elements in intersection
def dates_repeted(dates, new_dates):
    pattern = set(dates)
    return [x for x in new_dates if x in dates]

In [8]:
# Convert date to str
def date_to_str(date):
    try:
        return date.strftime('%Y-%m-%d')
    except:
        return date

## Load data

In [9]:
%%time
# The SO data is accumulated, so we import several dates
df_so = pd.DataFrame(columns=range(9))
for year, week in [(year, w) for w in range(week - 5, week + 1)]: 
    path = r'C:\Users\jshernandezm\genommalabinternacional\Francisco Jose Delfino - Stocks_canal\Sell Out - No B2b\Disval'
    try:
        # Search the files in differents locations
        try:
            file_name = '{0}_{1}_Disval_SO.txt'
            df = pd.read_csv(path + '\\' + file_name.format(year, str(week).zfill(2)), header=None)
        except:
            file_name = '{0}_{1}_Disval_SO.xlsx'
            df = pd.read_excel(path + '\\' + file_name.format(year, str(week).zfill(2)), header=None)
        # Clean date
        df[5] = df[5].map(date_to_str)
        # Get the unique days
        dr = dates_repeted(df_so[5].unique(), df[5].unique())
        if len(dr) > 0:
            df_so = df_so[~df_so[5].isin(dr)].copy()
        df_so = pd.concat([df_so, df], axis=0)
    except:
        print('Ojo con el archivo ' + str(year) + ' - ' + str(week))

Wall time: 663 ms


## Clean data

In [10]:
# Formatting sell out columns
df_so['Fecha'] = df_so[5].apply(lambda x: datetime.strptime(x, '%Y-%m-%d'))
# Filter the specific week
df_so = df_so[df_so['Fecha'].isin(df_days['TmpFecha'].tolist())].copy()
df_so.rename({6:'EAN o UPC Genomma', 7:'Descripcion_Local', 8:'Unidades'}, axis=1, inplace=True)
df_so['Cadena'] = 'Disval'
df_so['Cod_Prod'] = df_so['EAN o UPC Genomma']
df_so['Cod_Local'] = '89310'
data_so = df_so[['Fecha', 'Cadena', 'Cod_Prod', 'EAN o UPC Genomma', 'Cod_Local', 'Descripcion_Local', 'Unidades']].copy()
# Filter EAN
data_so = data_so[~data_so['EAN o UPC Genomma'].isin([999999, 9999999999999])].copy()
# Merge with stock's Descripcion
aux = data_stock[['EAN o UPC Genomma', 'Descripcion_Prod']]
data_so = data_so.merge(aux.drop_duplicates(), on='EAN o UPC Genomma', how='left')
# Order columns
final_so = data_so[['Fecha', 'Cadena', 'Cod_Prod', 'EAN o UPC Genomma', 'Descripcion_Prod', 'Cod_Local', 'Descripcion_Local', 'Unidades']].copy()

In [11]:
final_so.isnull().sum()

Fecha                0
Cadena               0
Cod_Prod             0
EAN o UPC Genomma    0
Descripcion_Prod     0
Cod_Local            0
Descripcion_Local    0
Unidades             0
dtype: int64

In [12]:
final_so['Fecha'].unique()

array(['2021-07-19T00:00:00.000000000', '2021-07-20T00:00:00.000000000',
       '2021-07-21T00:00:00.000000000', '2021-07-22T00:00:00.000000000',
       '2021-07-23T00:00:00.000000000', '2021-07-24T00:00:00.000000000'],
      dtype='datetime64[ns]')

# Export file

In [13]:
# Export to layout file
def write_layout(filename, df, sellout=True):
    writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a')
    writer.book = load_workbook(filename)
    writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
    if sellout == True:
        df.to_excel(writer, sheet_name='Ventas', startcol=1, startrow=18, index=False, header=None)
    else:
        df.to_excel(writer, sheet_name='Stock', startcol=1, startrow=18, index=False, header=None)
    writer.save()

In [14]:
final_so.isnull().sum()

Fecha                0
Cadena               0
Cod_Prod             0
EAN o UPC Genomma    0
Descripcion_Prod     0
Cod_Local            0
Descripcion_Local    0
Unidades             0
dtype: int64