In [1]:
!pip list

Package                            Version

You should consider upgrading via the 'c:\users\jshernandezm\anaconda3\python.exe -m pip install --upgrade pip' command.



---------------------------------- -------------------
alabaster                          0.7.12
altair                             4.2.0
altair-data-server                 0.4.1
altair-saver                       0.5.0
altair-viewer                      0.4.0
anaconda-client                    1.7.2
anaconda-navigator                 2.0.3
anaconda-project                   0.10.0
anyio                              2.2.0
appdirs                            1.4.4
argh                               0.26.2
argon2-cffi                        20.1.0
asn1crypto                         1.4.0
astroid                            2.4.0
astropy                            4.2.1
async-generator                    1.10
atomicwrites                       1.4.0
attrs                              19.3.0
autopep8                           1.5.6
Babel                              2.9.1
backcall                           0.1.0
backports.functools-lru-cache      1.6.4
backports.shutil-get-terminal-size 1.0

# Libraries

In [2]:
import os
import shutil
import pandas as pd
from zipfile import ZipFile
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')
pd.io.formats.excel.ExcelFormatter.header_style = None

# Parameters

In [2]:
filename = 'Walmart_Data.zip'
pr_files = ['PR GL PULL 03-07-22(13983).xlsx', 'Genomma Labs Week 202205(13956).xlsx']

In [3]:
today = '3.7.22'
year = datetime.today().isocalendar()[0]
week = datetime.today().isocalendar()[1]
path = r'\\NASPRO.infovisiontv.com\DWH01\Cognos\Genomma lab USA\US Data\Walmart\{}'

# Unzipping

In [4]:
%%time
with ZipFile(path.format(year) + '\\' + filename, 'r') as zip_ref:
    zip_ref.extractall(path.format(year))

Wall time: 36.7 s


# Moving Files

## DotCom

In [5]:
new_folder = path.format(year) + '\\DotCom\\' + 'W {0} {1}'.format(week, year)
if not os.path.exists(new_folder):
    os.mkdir(new_folder)
for file in [f for f in zip_ref.namelist() if 'com' in f]:
    shutil.move(path.format(year) + '\\' + file, new_folder)
# Rename "New Beauty no dotcom" file
old_name = new_folder + '\\' + 'Walmart.com Data Pull {}.xlsx'.format(today)
new_name = new_folder + '\\' + '{0}_{1}_WalmarteCommerce.xlsx'.format(year, str(week).zfill(2))
os.rename(old_name, new_name)

## WalMart USA

In [6]:
new_folder = path.format(year) + '\\' + 'W {0} {1}'.format(week, year)
if not os.path.exists(new_folder):
    os.mkdir(new_folder)
for file in ['New OTC 1 {0}.xlsx', 'New OTC 2 {0}.xlsx', 'New Beauty {0}.xlsx']:
    shutil.move(path.format(year) + '\\' + file.format(today), new_folder)

## Not Used

In [7]:
nu_folder = r'\\NASPRO.infovisiontv.com\DWH01\Cognos\Genomma lab USA\US Data\Walmart\{0}\Not Used'
nu_folder = nu_folder.format(year)

if not os.path.exists(nu_folder):
    os.mkdir(nu_folder)

for file in [f for f in os.listdir(path.format(year)) if 'PG GL Pull' in f]:
    shutil.move(path.format(year) + '\\' + file, nu_folder)

## PR

In [8]:
new_folder = path.format(year) + '\\PR\\' + 'W {0} {1}'.format(week, year)
if not os.path.exists(new_folder):
    os.mkdir(new_folder)
for file in [f for f in os.listdir(path.format(year)) if 'xlsx' in f]:
    shutil.move(path.format(year) + '\\' + file, new_folder)

# Remove Zip File

In [9]:
os.remove(path.format(year) + '\\' + filename)

# Tranform Data

In [10]:
def reshape_data(df):
    # Find where there are not nulls data
    since_notnull = df[df.iloc[:,8].notnull()].index[0]
    # This will be new columns names
    cols = df.iloc[since_notnull,:].tolist()
    # From here we have the data
    data = df.iloc[since_notnull + 1:].copy()
    # Reset index and rename
    data.reset_index(drop=True, inplace=True)
    data.rename(dict(zip(df.columns.tolist(), cols)), axis=1, inplace=True)
    return data

In [11]:
path_server = r'\\NASPRO.infovisiontv.com\DWH01\Cognos\Genomma lab USA\US Data\Walmart\{0}\W {1} {0}'

In [12]:
%%time
df = pd.DataFrame()
for file in os.listdir(path_server.format(year, week)):
    aux = pd.read_excel(path_server.format(year, week) + '\\' + file)
    df = pd.concat([df, reshape_data(aux)])

Wall time: 1min 5s


In [13]:
num_cols = ['POS Qty', 'Curr Str On Hand Qty', 'Curr Str On Order Qty', 'Curr Str In Transit Qty']
for col in num_cols:
    df[col] = df[col].map(int)
df['POS Sales'] = df['POS Sales'].map(float)

In [14]:
%%time
df_pr = pd.DataFrame()
for file in pr_files:
    aux = pd.read_excel(new_folder + '\\' + file)
    df_pr = pd.concat([df_pr, reshape_data(aux)])

Wall time: 3.59 s


In [15]:
for col in num_cols:
    df_pr[col] = df_pr[col].map(float).map(int)
df_pr['POS Sales'] = df_pr['POS Sales'].map(float)

In [16]:
data = pd.concat([df, df_pr])

In [17]:
df[num_cols].sum()

POS Qty                     67676
Curr Str On Hand Qty       503006
Curr Str On Order Qty       92970
Curr Str In Transit Qty      8545
dtype: int64

In [18]:
df_pr[num_cols].sum()

POS Qty                     4596
Curr Str On Hand Qty       16922
Curr Str On Order Qty       4411
Curr Str In Transit Qty        0
dtype: int64

In [19]:
def last_clean(data_final):
    correct_ean = {65024001847:65006600048, 65024001848:65006700046}
    data_final['POS Qty'].fillna(0, inplace=True)
    data_final['POS Sales'].fillna(0, inplace=True)
    data_final['AÑO'] = year
    data_final['SEMANA'] = week
    data_final['Item Nbr'] = data_final['Item Nbr'].map(int)
    data_final['UPC'] = data_final['UPC'].map(int)
    data_final['UPC'].replace(correct_ean, inplace=True)
    data_final['Store Nbr'] = data_final['Store Nbr'].map(int)
    return data_final.pivot_table(index=['AÑO', 'SEMANA', 'Item Nbr', 'UPC', 'Item Desc 1', 'Store Nbr',
                                         'Street Address', 'City', 'State', 'Zip Code'],
                                  values=['POS Qty', 'POS Sales','Curr Str On Hand Qty',
                                          'Curr Str On Order Qty', 'Curr Str In Transit Qty'],
                                  aggfunc='sum').reset_index()

In [20]:
def export_layout(data_final, year, week):
    # Limpiamos algunas columnas
    final = last_clean(data_final)
    final['Item Flags'] = None
    # Se reacomodan las columnas
    cols = ['AÑO', 'SEMANA', 'Item Nbr', 'Item Flags', 'UPC', 'Item Desc 1', 'Store Nbr', 'Street Address', 'City', 'State', 'Zip Code', 'POS Qty', 'POS Sales','Curr Str On Hand Qty', 'Curr Str On Order Qty', 'Curr Str In Transit Qty']
    path_export = r'\\NASPRO.infovisiontv.com\DWH01\Cognos\Genomma lab USA\US Data\Walmart\Output\{0}'.format(year)
    file_export = '{0}_{1}_Walmart_USA.xlsx'.format(year, str(week).zfill(2))
    # Se exporta la data
    final[cols].to_excel(path_export + '\\' + file_export, sheet_name='Hoja1', index=False)

In [21]:
%%time
export_layout(data, year, week)

Wall time: 1min 10s
