In [1]:
# Initial imports
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
# Load the ventas_linea.csv dataset.
stock_move_df = pd.read_csv('./stock_move.csv')
stock_move_df

Unnamed: 0,id,date,location_id,location_dest_id,product_id,qty_done,state
0,87634,4/13/2022 16:46,"[13, 'PL/Traspaso desde Alm Central']","[8, 'WCENT/Existencias']","[25761, 'SAM A22 4/64 NEGRO']",1.0,done
1,87635,4/13/2022 16:46,"[13, 'PL/Traspaso desde Alm Central']","[8, 'WCENT/Existencias']","[25550, 'MOTO G20 4/64 ROSA']",1.0,done
2,87636,4/13/2022 16:46,"[13, 'PL/Traspaso desde Alm Central']","[8, 'WCENT/Existencias']","[24903, 'LANIX X860 2/32 VDE']",1.0,done
3,87637,4/13/2022 16:46,"[13, 'PL/Traspaso desde Alm Central']","[8, 'WCENT/Existencias']","[25549, 'MOTO G20 4/64 AZUL']",1.0,done
4,87638,4/13/2022 16:46,"[13, 'PL/Traspaso desde Alm Central']","[8, 'WCENT/Existencias']","[25926, 'USB FLASH DRIVE 16GB']",2.0,done
...,...,...,...,...,...,...,...
516989,534205,2/23/2023 18:53,"[82, 'WVRTA/Existencias']","[5, 'Partner Locations/Clientes']","[28980, '[AUT119] AUDIFONOS INAL 1HORA AUT119 ...",1.0,done
516990,534428,2/23/2023 23:39,"[82, 'WVRTA/Existencias']","[5, 'Partner Locations/Clientes']","[19210, 'AUDIFONO GAMER STF BEAST-MUSPELL NEGRO']",1.0,done
516991,534429,2/23/2023 23:39,"[82, 'WVRTA/Existencias']","[5, 'Partner Locations/Clientes']","[25596, 'MOUSE ALAMBRICO MOREKA MU-01']",1.0,done
516992,75441,4/5/2022 18:56,"[82, 'WVRTA/Existencias']","[14, 'VL/Ajustes de Inventario']","[25670, '[PJ033] PORTACELULAR MAGNETICO 1HORA ...",1.0,done


In [3]:
# Understand all the columns that exist in the data frame
print(stock_move_df.columns)

Index(['id', 'date', 'location_id', 'location_dest_id', 'product_id',
       'qty_done', 'state'],
      dtype='object')


In [4]:
# Count false values
false_stock_move_df = (stock_move_df == 'False').sum().sum()
print(f"There are {false_stock_move_df} 'False' values in the data frame.")

There are 0 'False' values in the data frame.


In [5]:
# Count null values
null_stock_move_df = stock_move_df.isnull().sum()
print(f"There are {null_stock_move_df} 'Null' values in the data frame.")

There are id                  0
date                0
location_id         0
location_dest_id    0
product_id          0
qty_done            0
state               0
dtype: int64 'Null' values in the data frame.


In [6]:
# Drop null values
null_stock_move_df = stock_move_df.dropna()

In [7]:
# Split the 'product_id' column of stock_move_df into two columns using the comma delimiter and select each column
new_df = stock_move_df['product_id'].str.split(', ', n=1, expand=True).rename(columns={0:'Product_ID', 1:'Description'})
new_df['Product_ID'] = new_df['Product_ID'].str[1:]
new_df['Description'] = new_df['Description'].str[:-1]
new_df.head()

Unnamed: 0,Product_ID,Description
0,25761,'SAM A22 4/64 NEGRO'
1,25550,'MOTO G20 4/64 ROSA'
2,24903,'LANIX X860 2/32 VDE'
3,25549,'MOTO G20 4/64 AZUL'
4,25926,'USB FLASH DRIVE 16GB'


In [8]:
# Split the 'location_id' column of stock_move_df into two columns using the comma delimiter and select each column
new_df1 = stock_move_df['location_id'].str.split(', ', n=1, expand=True).rename(columns={0:'ID_Warehouse', 1:'Location_Warehouse'})
new_df1['ID_Warehouse'] = new_df1['ID_Warehouse'].str[1:]
new_df1['Location_Warehouse'] = new_df1['Location_Warehouse'].str[:-1]
new_df1

Unnamed: 0,ID_Warehouse,Location_Warehouse
0,13,'PL/Traspaso desde Alm Central'
1,13,'PL/Traspaso desde Alm Central'
2,13,'PL/Traspaso desde Alm Central'
3,13,'PL/Traspaso desde Alm Central'
4,13,'PL/Traspaso desde Alm Central'
...,...,...
516989,82,'WVRTA/Existencias'
516990,82,'WVRTA/Existencias'
516991,82,'WVRTA/Existencias'
516992,82,'WVRTA/Existencias'


In [9]:
# Split the 'location_dest_id' column of stock_move_df into two columns using the comma delimiter and select each column
new_df2 = stock_move_df['location_dest_id'].str.split(', ', n=1, expand=True).rename(columns={0:'Location_dest_id', 1:'Location_dest'})
new_df2['Location_dest_id'] = new_df2['Location_dest_id'].str[1:]
new_df2['Location_dest'] = new_df2['Location_dest'].str[:-1]
new_df2

Unnamed: 0,Location_dest_id,Location_dest
0,8,'WCENT/Existencias'
1,8,'WCENT/Existencias'
2,8,'WCENT/Existencias'
3,8,'WCENT/Existencias'
4,8,'WCENT/Existencias'
...,...,...
516989,5,'Partner Locations/Clientes'
516990,5,'Partner Locations/Clientes'
516991,5,'Partner Locations/Clientes'
516992,14,'VL/Ajustes de Inventario'


In [10]:
# Concatenate the original DataFrame with the new DataFrame
new_stock_move_df = pd.concat([stock_move_df.drop(columns=['product_id', 'location_dest_id', 'location_id']), new_df, new_df1, new_df2], axis=1)

# Print the resulting DataFrame
new_stock_move_df 

Unnamed: 0,id,date,qty_done,state,Product_ID,Description,ID_Warehouse,Location_Warehouse,Location_dest_id,Location_dest
0,87634,4/13/2022 16:46,1.0,done,25761,'SAM A22 4/64 NEGRO',13,'PL/Traspaso desde Alm Central',8,'WCENT/Existencias'
1,87635,4/13/2022 16:46,1.0,done,25550,'MOTO G20 4/64 ROSA',13,'PL/Traspaso desde Alm Central',8,'WCENT/Existencias'
2,87636,4/13/2022 16:46,1.0,done,24903,'LANIX X860 2/32 VDE',13,'PL/Traspaso desde Alm Central',8,'WCENT/Existencias'
3,87637,4/13/2022 16:46,1.0,done,25549,'MOTO G20 4/64 AZUL',13,'PL/Traspaso desde Alm Central',8,'WCENT/Existencias'
4,87638,4/13/2022 16:46,2.0,done,25926,'USB FLASH DRIVE 16GB',13,'PL/Traspaso desde Alm Central',8,'WCENT/Existencias'
...,...,...,...,...,...,...,...,...,...,...
516989,534205,2/23/2023 18:53,1.0,done,28980,'[AUT119] AUDIFONOS INAL 1HORA AUT119 MORADO-P...,82,'WVRTA/Existencias',5,'Partner Locations/Clientes'
516990,534428,2/23/2023 23:39,1.0,done,19210,'AUDIFONO GAMER STF BEAST-MUSPELL NEGRO',82,'WVRTA/Existencias',5,'Partner Locations/Clientes'
516991,534429,2/23/2023 23:39,1.0,done,25596,'MOUSE ALAMBRICO MOREKA MU-01',82,'WVRTA/Existencias',5,'Partner Locations/Clientes'
516992,75441,4/5/2022 18:56,1.0,done,25670,'[PJ033] PORTACELULAR MAGNETICO 1HORA PJ033 NE...,82,'WVRTA/Existencias',14,'VL/Ajustes de Inventario'


In [14]:
# Define a list of words that represent devices
accesories_words = ['MICA', 'MICRO', 'SOPORTE', 'GLASS', 'SIM', 'TARJETA', 'TIEMPO AIRE', 'OTTER' , 
                    'ADAPTADOR DE CORRIENTE', 'USB', 'FLASH DRIVE', 'AUDIFONO', 'AUDÍFONOS', 'FDA',
                   'CARGADOR', 'PORTA', 'CABLE','TABLET']
# Check which rows contain any of the device-related words
is_device =new_stock_move_df ['Description'].str.contains('|'.join(accesories_words))

# Invert the boolean values to select only the rows where no device-related words are present
new_stock_move_df = new_stock_move_df[~is_device]

# Rename colum qty_done to qty
new_stock_move_df = new_stock_move_df.rename(columns={'qty_done': 'qty'})
new_stock_move_df 

Unnamed: 0,id,date,qty,state,Product_ID,Description,ID_Warehouse,Location_Warehouse,Location_dest_id,Location_dest
0,87634,2022-04-13 16:46:00,1.0,done,25761,'SAM A22 4/64 NEGRO',13,'PL/Traspaso desde Alm Central',8,'WCENT/Existencias'
1,87635,2022-04-13 16:46:00,1.0,done,25550,'MOTO G20 4/64 ROSA',13,'PL/Traspaso desde Alm Central',8,'WCENT/Existencias'
2,87636,2022-04-13 16:46:00,1.0,done,24903,'LANIX X860 2/32 VDE',13,'PL/Traspaso desde Alm Central',8,'WCENT/Existencias'
3,87637,2022-04-13 16:46:00,1.0,done,25549,'MOTO G20 4/64 AZUL',13,'PL/Traspaso desde Alm Central',8,'WCENT/Existencias'
21,110449,2022-05-04 14:15:00,1.0,done,25758,'SAM A22 4/64 BCO',13,'PL/Traspaso desde Alm Central',8,'WCENT/Existencias'
...,...,...,...,...,...,...,...,...,...,...
516891,488917,2023-01-20 19:41:00,1.0,done,19506,'[BOC062N] BOCINA INAL 1HORA BOC062 NEGRO',82,'WVRTA/Existencias',5,'Partner Locations/Clientes'
516909,491553,2023-01-23 17:42:00,1.0,done,25648,'[GAR108] PLUGIN AUTO 1HORA 2PUERTOS GAR108 NE...,82,'WVRTA/Existencias',5,'Partner Locations/Clientes'
516930,504804,2023-02-02 19:51:00,1.0,done,25691,"'[GAR159] POWER BANK 20,000 mAh 1HORA GAR159 N...",82,'WVRTA/Existencias',5,'Partner Locations/Clientes'
516973,526934,2023-02-18 16:57:00,1.0,done,29542,'WIRED BUDS NOKIA 101 NEGRO',82,'WVRTA/Existencias',5,'Partner Locations/Clientes'


In [15]:
#Convert date to datetime
new_stock_move_df["date"] = new_stock_move_df["date"].astype('datetime64[ns]')
new_stock_move_df

Unnamed: 0,id,date,qty,state,Product_ID,Description,ID_Warehouse,Location_Warehouse,Location_dest_id,Location_dest
0,87634,2022-04-13 16:46:00,1.0,done,25761,'SAM A22 4/64 NEGRO',13,'PL/Traspaso desde Alm Central',8,'WCENT/Existencias'
1,87635,2022-04-13 16:46:00,1.0,done,25550,'MOTO G20 4/64 ROSA',13,'PL/Traspaso desde Alm Central',8,'WCENT/Existencias'
2,87636,2022-04-13 16:46:00,1.0,done,24903,'LANIX X860 2/32 VDE',13,'PL/Traspaso desde Alm Central',8,'WCENT/Existencias'
3,87637,2022-04-13 16:46:00,1.0,done,25549,'MOTO G20 4/64 AZUL',13,'PL/Traspaso desde Alm Central',8,'WCENT/Existencias'
21,110449,2022-05-04 14:15:00,1.0,done,25758,'SAM A22 4/64 BCO',13,'PL/Traspaso desde Alm Central',8,'WCENT/Existencias'
...,...,...,...,...,...,...,...,...,...,...
516891,488917,2023-01-20 19:41:00,1.0,done,19506,'[BOC062N] BOCINA INAL 1HORA BOC062 NEGRO',82,'WVRTA/Existencias',5,'Partner Locations/Clientes'
516909,491553,2023-01-23 17:42:00,1.0,done,25648,'[GAR108] PLUGIN AUTO 1HORA 2PUERTOS GAR108 NE...,82,'WVRTA/Existencias',5,'Partner Locations/Clientes'
516930,504804,2023-02-02 19:51:00,1.0,done,25691,"'[GAR159] POWER BANK 20,000 mAh 1HORA GAR159 N...",82,'WVRTA/Existencias',5,'Partner Locations/Clientes'
516973,526934,2023-02-18 16:57:00,1.0,done,29542,'WIRED BUDS NOKIA 101 NEGRO',82,'WVRTA/Existencias',5,'Partner Locations/Clientes'


In [16]:
# Export the new DataFrame to a CSV file
new_stock_move_df.to_csv('new_stock_move.csv', index=False)