In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
import os
from tqdm.notebook import tqdm

In [2]:
#Esta función devuelve una tupla con la ruta del fichero y luego nos quedamos con la 'fecha' del fichero
def file_iterator(path):
    """
    DESCRIPTION
      This function retreives all files in a specific folder.
    ARGUMENTS
      path: Where we want to get all files
    RETURN
      A list of tuples, first element is the path and the second is the file name without any extension
    """
    return [(os.path.join(path,sub,file),file.split('.')[0]) for sub in os.listdir(path) 
            for file in os.listdir(os.path.join(path,sub))]

In [214]:
def file_extractor(files):
    """
    DESCRIPTION
      This function return a concatenated DataFrame with all desired columns
    ARGUMENTS
      files: This is list of tuples, first element is the path and the second is the file name without any extension
    RETURN
      A concatenated DataFrame with 10 columns based on the files contained in the argument
    """
    
    #A list of DataFrames, where then we will concatenated
    stock = []
    #This is the returned DataFrame
    pd_stock = pd.DataFrame()
    
    #Progression bar
    pbar = tqdm()
    pbar.reset(len(files))
    
    #Files is a Tuple made up of (path,name without extension)
    for path, name in files:
        #Read the file
        print(name)
        aux_stock = pd.read_excel(path, sheet_name='Daily Stock Report')        
        
        #Get the desired columns and this is difficult because the format has changed across years
        #Format 2 has this condition aux_stock.columns[0] == 'Country' else Format 1
        if aux_stock.columns[0] != 'Country':
            #First of all pick the Tablets from the old version - Condition NaN Column 2 and Tablet Column 5
            aux_stock_tablet = aux_stock[(aux_stock.iloc[:,4].str.lower() == 'tablet') 
                                         & (aux_stock.iloc[:,1].isna())].iloc[:,[1,2,3,4,5,9]]
            #Need to add Spain in one column
            aux_stock_tablet.fillna('Spain', inplace=True)

            #Secondly, get the rest of the products from Spain
            aux_stock = aux_stock[aux_stock.iloc[:,1] == 'Spain'].iloc[:,[1,2,3,4,5,9]]
            #Merge both DataFrame
            aux_stock = pd.concat([aux_stock_tablet, aux_stock], ignore_index=True)
        else:
            #There are a lot of conditions which has changed across years
            aux_stock = aux_stock[(aux_stock.iloc[:,0] == 'Shared') | (aux_stock.iloc[:,0] == 'Shared - Group II') 
                                  | (aux_stock.iloc[:,0].str.contains('ES')) 
                                  | (aux_stock.iloc[:,0] == 'Spain')].iloc[:,[0,1,2,3,4,8]]
        
        #We will use the file name for creating the Date, Year, Month, Day and Week Number
        year, month, day = name.split('-')
        week = pd.to_datetime(name).isocalendar()[1]
        #Add the Date columns
        aux_stock['Year'] = int(year)
        aux_stock['Month'] = int(month)
        aux_stock['Day'] = int(day)
        aux_stock['Week'] = week
        
        #We add this NUMPY ARRAY because the columns have different names
        stock.append(aux_stock.values)
        #Progress bar
        pbar.update()
    
    #Progress bar
    pbar.refresh()
    #Concatenated all NUMPY arrays and convert to DataFrame  
    pd_stock = pd.DataFrame(np.concatenate(stock))
    
    #New the columns name because they are unknow, just 0, 1, 2...
    columns = ['Country', 'PartNumber', 'Description / Family', 
               'Brand', 'Stock', 'Open to Sell', 'Year', 'Month', 'Day', 'Week']
    pd_stock.columns = columns
    #Return the final DataFrame in the desired order
    return pd_stock[['Year', 'Month', 'Day', 'Week','Country', 'PartNumber', 'Description / Family', 
               'Brand', 'Stock', 'Open to Sell']].sort_values(by=['Year', 'Month', 'Day'])

In [105]:
path = '/home/dsc/Repos/TFM/Stock'

In [216]:
files = file_iterator(path)

In [197]:
files

[('/home/dsc/Repos/TFM/Stock/2018/2018-04-03.xlsx', '2018-04-03'),
 ('/home/dsc/Repos/TFM/Stock/2015/2015-03-11.xlsx', '2015-03-11')]

In [217]:
stock = file_extractor(files)

HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…

2020-09-14

2020-08-10
2020-03-23
2020-03-30
2020-01-13
2020-12-31
2020-08-17
2020-11-09
2020-10-12
2020-06-29
2020-12-07
2020-06-15
2020-07-13
2020-08-03
2020-05-18
2020-03-09
2020-07-27
2020-10-19
2020-04-27
2020-05-04
2020-11-23
2020-04-06
2020-08-24
2020-12-14
2020-03-16
2020-01-27
2020-07-06
2020-06-01
2020-06-22
2020-06-08
2020-01-06
2020-02-03
2020-12-21
2020-01-20
2020-11-30
2020-04-13
2020-02-10
2020-03-02
2020-09-07
2020-07-20
2020-05-25
2020-11-02
2020-11-16
2020-09-21
2020-05-11
2020-08-31
2020-02-24
2020-01-03
2020-10-26
2020-04-20
2020-09-28
2020-02-17
2020-10-05
2018-07-02
2018-01-08
2018-12-17
2018-07-09
2018-01-15
2018-09-17
2018-10-29
2018-04-10
2018-06-04
2018-02-26
2018-01-25
2018-08-06
2018-04-03
2018-04-30
2018-11-19
2018-05-28
2018-02-20
2018-09-03
2018-03-12
2018-11-26
2018-08-20
2018-06-18
2018-11-12
2018-07-23
2018-03-05
2018-05-14
2018-08-13
2018-06-25
2018-09-10
2018-01-29
2018-10-16
2018-12-24
2018-02-05
2018-01-02
2018-05-21
2018-12-31
2018-10-08
2018-04-1

In [218]:
stock

Unnamed: 0,Year,Month,Day,Week,Country,PartNumber,Description / Family,Brand,Stock,Open to Sell
30134,2015,3,11,11,Spain,59428120,YOGA Tablet 2 Pro–1380F 32GPT-DE,TABLET,145,145
30135,2015,3,11,11,Spain,59426282,YOGA Tablet 2-1050F 16GPT-DE,TABLET,100,99
30136,2015,3,11,11,Spain,59427831,YOGA Tablet 2-1050L 16GPT-DE,TABLET,62,60
30137,2015,3,11,11,Spain,59429485,YOGA Tablet 2 Pro-1380L 32GPT-DE,Tablet,74,74
30138,2015,3,11,11,Spain,59426288,YOGA Tablet 2-1050F 32GPT-DE,Tablet,67,67
...,...,...,...,...,...,...,...,...,...,...
26342,2021,3,15,11,Spain,82MS0028SP,NB YG Slim 7 Pro 14ACH5 R5 16G 512G 10H,Notebook,0,0
26343,2021,3,15,11,Spain,90Q7003TES,DT IC Mini 5 01IMH05 I510400T 512G 16G,AIO,0,0
26344,2021,3,15,11,Spain,90RE003TSP,DT IC Gaming5 14IOB6 I511400 512 16G,AIO,0,0
26345,2021,3,15,11,Spain,F0FN001CSP,DT YG AIO 7 27ARH6 A74800H 512G 16G,AIO,0,0


In [219]:
stock.to_excel('stock.xlsx', index=False)




In [88]:
files[:1]

[('/home/dsc/Repos/TFM/Stock/2018/2018-07-02.xlsx', '2018-07-02')]

In [80]:
aux_stock = pd.read_excel(files[0][0], sheet_name='Daily Stock Report')





In [None]:
columns = ['Country', 'PartNumber', 'Description / Family', 
               'Brand', 'Stock', 'Open to Sell', 'Year', 'Month', 'Day', 'Week']

In [117]:
aux_stock[(aux_stock.iloc[:,0] == 'Shared') 
          | (aux_stock.iloc[:,0] == 'Shared - Group II') 
          | (aux_stock.iloc[:,0].str.contains('ES')) 
          | (aux_stock.iloc[:,0] == 'Spain')].iloc[:,[0,1,2,3,4,8]]

Unnamed: 0,Country,PartNumber,Description / Family,Brand,Stock,Open to Sell
181,ESIT,PA9W0011ES,MOTO Phone XT1922-3 ES 3+32 BE DS,,0,0
182,ESIT,PAAL0056ES,MOTO Phone XT1925-5 ES 3+32 BE DS,,0,0
183,ESIT,PAAV0018ES,MOTO Phone XT1926-3 ES 4+64G BE DS,,0,0
483,Shared,G0A10170CE,Thunderbolt3 Graphics Dock(CE),Option,100,100
484,Shared,GX30N73442,KB MICE_BO L500 controller,Option,100,100
...,...,...,...,...,...,...
560,Spain,F0DE0027SP,Desktop IC AIO 520-27ICB I58400T 1TB 16G,AIO,0,0
561,Spain,ZA0V0078ES,Lenovo YB1-X90F TAB 4G+64GGR-ES,Notebook,2,2
562,Spain,ZA0W0028ES,Lenovo YB1-X90L TAB 4G+64GGR-ES,Notebook,10,10
563,Spain,ZA150044ES,Lenovo YB1-X91F TAB 4G+64GBL-ES,Notebook,5,4


In [116]:
aux_stock[(aux_stock.iloc[:,0] == 'Shared') 
          | (aux_stock.iloc[:,0] == 'Shared - Group II') 
          | (aux_stock.iloc[:,0].str.contains('ES')) 
          | (aux_stock.iloc[:,0] == 'Spain')]

Unnamed: 0,Country,PartNumber,Description / Family,Brand,Stock,Released Orders,Clean to Ship,Order Shorts,Open to Sell,Comments,Replenishment ETA,Current Status,Estimated Delivery
181,ESIT,PA9W0011ES,MOTO Phone XT1922-3 ES 3+32 BE DS,,0,0,0,0,0,,,,
182,ESIT,PAAL0056ES,MOTO Phone XT1925-5 ES 3+32 BE DS,,0,0,0,0,0,,,,
183,ESIT,PAAV0018ES,MOTO Phone XT1926-3 ES 4+64G BE DS,,0,0,0,0,0,,,,
483,Shared,G0A10170CE,Thunderbolt3 Graphics Dock(CE),Option,100,0,0,0,100,,,,
484,Shared,GX30N73442,KB MICE_BO L500 controller,Option,100,0,0,0,100,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
560,Spain,F0DE0027SP,Desktop IC AIO 520-27ICB I58400T 1TB 16G,AIO,0,0,0,0,0,,,,
561,Spain,ZA0V0078ES,Lenovo YB1-X90F TAB 4G+64GGR-ES,Notebook,2,0,0,0,2,,,,
562,Spain,ZA0W0028ES,Lenovo YB1-X90L TAB 4G+64GGR-ES,Notebook,10,0,0,0,10,,,,
563,Spain,ZA150044ES,Lenovo YB1-X91F TAB 4G+64GBL-ES,Notebook,5,1,1,0,4,,,,


In [70]:
 aux_stock = aux_stock[(aux_stock.iloc[:,0] == 'Shared') | (aux_stock.iloc[:,0] == 'Shared - Group II') 
                                  | (aux_stock.iloc[:,0].str.contains('ES')) 
                                  | (aux_stock.iloc[:,0] == 'Spain')].iloc[:,[1,2,3,4,5,9]]

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 9
5,Spain,59428120,YOGA Tablet 2 Pro–1380F 32GPT-DE,Tablet,87,85
9,Spain,ZA0H0009SE,Lenovo YT3–X50,Tablet,0,0
10,Spain,59426289,YOGA Tablet 2-1050F 16GPT-SE,TABLET,9,8
11,Spain,59428122,YOGA Tablet 2 Pro–1380F 32GPT-SE,Tablet,8,8


In [71]:
aux_stock_tablet

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 9
5,,59428120,YOGA Tablet 2 Pro–1380F 32GPT-DE,Tablet,87,85
9,,ZA0H0009SE,Lenovo YT3–X50,Tablet,0,0
10,,59426289,YOGA Tablet 2-1050F 16GPT-SE,TABLET,9,8
11,,59428122,YOGA Tablet 2 Pro–1380F 32GPT-SE,Tablet,8,8


In [79]:
stock = file_extractor(files[:1])

HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…

AttributeError: Can only use .str accessor with string values!

In [74]:
stock

Unnamed: 0,Year,Month,Day,Week,Country,PartNumber,Description / Family,Brand,Stock,Open to Sell
0,2015,10,19,43,Spain,59428120,YOGA Tablet 2 Pro–1380F 32GPT-DE,Tablet,87,85
1,2015,10,19,43,Spain,ZA0H0009SE,Lenovo YT3–X50,Tablet,0,0
2,2015,10,19,43,Spain,59426289,YOGA Tablet 2-1050F 16GPT-SE,TABLET,9,8
3,2015,10,19,43,Spain,59428122,YOGA Tablet 2 Pro–1380F 32GPT-SE,Tablet,8,8
4,2015,10,19,43,Spain,F0AN000SSP,A540,Desktop,6,6
5,2015,10,19,43,Spain,F0AN004ASP,A540,Desktop,10,10
6,2015,10,19,43,Spain,F0AM001USP,A740,Desktop,0,0
7,2015,10,19,43,Spain,F0AM009BSP,A740,Desktop,10,10
8,2015,10,19,43,Spain,F0AU00CPSP,B50-30,Desktop,6,6
9,2015,10,19,43,Spain,F0B4007USP,C40-30,Desktop,1,0


In [89]:
cadena = 'Shared - group'
'Shared' in cadena

True