<h1 id="tocheading">Table of Contents</h1>
<div id="toc"></div>

In [1]:
#from google.colab import drive
#drive.mount('/content/drive')

In [2]:
!pip install xlrd
!pip install folium
!pip install xgboost
!pip install geocoder
!pip install holidays-es
! pip install https://github.com/pandas-profiling/pandas-profiling/archive/master.zip

Collecting https://github.com/pandas-profiling/pandas-profiling/archive/master.zip
  Using cached https://github.com/pandas-profiling/pandas-profiling/archive/master.zip
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'


In [3]:
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')

<IPython.core.display.Javascript object>

# Setting up the Jupyter Notebook

All imports and configutations to set up the model are done here.

In [4]:
from IPython.core.display import display, HTML

display(HTML("<style>.container { width:100% !important; }</style>")) # Increase cell width
display(HTML("<style>.rendered_html { font-size: 16px; }</style>")) # Increase font size


# for data manipulation
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import xlrd
pd.set_option('display.max_columns', 60)
import json
import geocoder
from holidays_es import Province
from datetime import datetime, timedelta


# for visualization
from IPython.core.pylabtools import figsize
from matplotlib import pyplot as plt
%matplotlib inline
# to include graphs inline within the frontends next to code
import seaborn as sns
sns.set_context(font_scale=2)
import folium
from pandas_profiling import ProfileReport


# to bypass warnings in various dataframe assignments
pd.options.mode.chained_assignment = None

# machine learning models
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression

# preprocessing functions and evaluation models
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.dummy import DummyClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from statsmodels.tsa.seasonal import seasonal_decompose

# Clustering
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score

  from IPython.core.display import display, HTML


  from pandas_profiling import ProfileReport


# 1. Understand, Clean and Format Data

To understand how data is structured, We are going to look at:
* First and last rows
* Information
* Descriptive statistics of the dataset.

and apply cleaning and formatting afterwards, if necessary.

## Data Loading

In [None]:
##############  DATA LOADING COLAB #####################
_df_clima = pd.read_csv("/content/drive/MyDrive/ColabNotebooks/CapstonesDatasets/CLIMA.csv",sep=";",decimal=',')
_df_ventas = pd.read_csv("/content/drive/MyDrive/ColabNotebooks/CapstonesDatasets/VENTAS.csv")
_df_ventas = _df_ventas.apply(pd.to_numeric, errors='coerce')
_df_ventas_part1 = pd.read_csv("/content/drive/MyDrive/ColabNotebooks/CapstonesDatasets/VENTAS_PART1.csv")
_df_ventas_part1 = _df_ventas_part1.apply(pd.to_numeric, errors='coerce')
_df_ventas_part2 = pd.read_csv("/content/drive/MyDrive/ColabNotebooks/CapstonesDatasets/VENTAS_PART2.csv")
_df_ventas_part2 = _df_ventas_part2.apply(pd.to_numeric, errors='coerce')
_df_promociones = pd.read_csv("/content/drive/MyDrive/ColabNotebooks/CapstonesDatasets/PROMOCIONES.csv")
_df_proyecciones = pd.read_csv("/content/drive/MyDrive/ColabNotebooks/CapstonesDatasets/PROYECCIONES.csv",sep=";")
_df_dicco=pd.read_excel("/content/drive/MyDrive/ColabNotebooks/CapstonesDatasets/DICCIONARIO.xlsx",sheet_name=None)
_df_Censo_xlsx=pd.read_excel("/content/drive/MyDrive/ColabNotebooks/CapstonesDatasets/Censo.xlsx")
_df_holidays=pd.read_csv("/content/drive/MyDrive/ColabNotebooks/CapstonesDatasets/holidays_exploted_2021-2023_week_code.csv")

In [6]:
##############  DATA LOADING Local Enviroment #####################
_df_clima = pd.read_csv("Datasets/CLIMA.csv",sep=";",decimal=',')
_df_ventas = pd.read_csv("Datasets/VENTAS.csv")
_df_ventas = _df_ventas.apply(pd.to_numeric, errors='coerce')
_df_ventas_part1 = pd.read_csv("Datasets/VENTAS_PART1.csv")
_df_ventas_part1 = _df_ventas_part1.apply(pd.to_numeric, errors='coerce')
_df_ventas_part2 = pd.read_csv("Datasets/VENTAS_PART2.csv")
_df_ventas_part2 = _df_ventas_part2.apply(pd.to_numeric, errors='coerce')
_df_promociones = pd.read_csv("Datasets/PROMOCIONES.csv")
_df_proyecciones = pd.read_csv("Datasets/PROYECCIONES.csv",sep=";")
_df_dicco=pd.read_excel("Datasets/DICCIONARIO.xlsx",sheet_name=None)
_df_Censo_xlsx=pd.read_excel("Datasets/Censo.xlsx")
_df_holidays=pd.read_csv("Datasets/holidays_exploted_2021-2023_week_code.csv")


In [7]:
_df_Stores=_df_dicco['Stores']
_df_Provinces=_df_dicco['Provinces']
_df_Products=_df_dicco['Products']
_df_TimePeriods=_df_dicco['Time_Periods']
_df_Channels=_df_dicco['Channels']
_df_Promotions=_df_dicco['Promotions']

## Data Merging - Enrichment - Create a single df

### Enrichment functions

In [8]:
### Fuction to obtain holidays from 2019 to 2023 for each province

def obtain_holidays(provinces_df):
    for y in range(2019,2024,1):
        year_col_name = f'Holidays_{y}'
        provinces_df[year_col_name] = 0
        for i in range(len(provinces_df)):
            province = provinces_df['Provincia'][i]
            province = province.lower()
            province = province.replace(' ', '-').replace('ñ', 'n').replace('k', 'c')
            if(province == 'santa-cruz-de-tenerife' or province == 'area-metropolitana-de-barcelona' or province == 'area-metropolitana-de-madrid' ):
                province = province.split('-')[-1]

            holidays = Province(name=province.lower(), year=y).holidays()

            provinces_df[year_col_name][i] = holidays
    return provinces_df



In [9]:
### fuction to obtain the lat and lon of the provinces

def obtain_x_y_coords(provinces_df):
    provinces_df['Lat'] = 0
    provinces_df['Lon'] = 0
    for i in range(len(provinces_df)):
        g=geocoder.arcgis(provinces_df['Provincia'][i]+',Spain')
        provinces_df['Lat'][i] = g.latlng[0]
        provinces_df['Lon'][i] = g.latlng[1]
    return provinces_df

In [10]:
### function to obtain season, month and year from week code
def week_to_month_year_season(week_code, year):
    start_date = datetime(year, 1, 1)
    week_start = 6  # Sunday
    week_end = 5  # Saturday
    week_delta = timedelta(days=7)

    # Calculate the start and end dates of the given week code
    start_of_week = start_date + timedelta(days=(week_code - 1) * 7 - start_date.weekday() + week_start)
    end_of_week = start_of_week + week_delta - timedelta(days=1)

    # Extract the month, year, and season from the start of the week
    month = start_of_week.strftime('%B')
    year = start_of_week.year

    if month in ['March', 'April', 'May']:
        season = 'Spring'
    elif month in ['June', 'July', 'August']:
        season = 'Summer'
    elif month in ['September', 'October', 'November']:
        season = 'Fall'
    else:
        season = 'Winter'

    return month, year, season



### Data Merging & enrichment

In [11]:

_df_promociones['promocion'] = _df_promociones['promocion'].str.replace('Promo ','')
_df_promociones['promocion'] = _df_promociones['promocion'].str.replace(' ','_')


### dummy variables for promotions
promo_dummies = pd.get_dummies(_df_promociones, columns=['promocion'])
promo_dummies = promo_dummies.groupby(['cod_tienda', 'cod_semana', 'cod_producto']).agg('max').reset_index()



In [12]:
###control cell for checking the data 
promo_dummies

Unnamed: 0,cod_tienda,cod_semana,cod_producto,promocion_cabecera,promocion_descuento,promocion_expositor,promocion_extra_cantidad,promocion_folleto,promocion_isla,promocion_multicompra,promocion_regalo
0,1,1,222,0,1,0,0,0,0,0,0
1,1,1,612,0,1,0,0,0,0,0,0
2,1,1,625,0,1,0,0,0,0,0,0
3,1,1,832,0,1,0,0,0,0,0,0
4,1,1,869,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
1417436,557,113,1312,0,0,0,0,1,0,0,0
1417437,557,116,296,0,0,0,0,1,0,0,0
1417438,557,116,863,0,0,0,0,1,0,0,0
1417439,557,116,1086,0,0,1,0,0,0,0,0


In [13]:
#enrich provinces with lat and lon
_df_Provinces_enriched_x_y = obtain_x_y_coords(_df_Provinces)


In [14]:
###control cell for checking the data
_df_Provinces_enriched_x_y

Unnamed: 0,cod_provincia,Provincia,Comunidad autónoma,Lat,Lon
0,1,Alava,País Vasco,42.834658,-2.720398
1,2,Albacete,Castilla-La Mancha,38.99793,-1.85292
2,3,Alicante,Comunidad Valenciana,38.3441,-0.48043
3,4,Almeria,Andalucía,36.84191,-2.46362
4,5,Avila,Castilla y León,40.65786,-4.69226
5,6,Badajoz,Extremadura,38.87483,-6.97272
6,7,Baleares,Baleares,39.574228,2.912648
7,8,Barcelona,Cataluña,41.38804,2.17001
8,9,Burgos,Castilla y León,42.34126,-3.69992
9,10,Caceres,Extremadura,39.47825,-6.36939


In [15]:
# Creating two different dataframes. One for historic data and another for projected data using the week code. 

_df_ventas_historic=_df_ventas[_df_ventas.cod_semana <= 116]
_df_ventas_projected=_df_ventas[_df_ventas.cod_semana > 116]

In [16]:
### zipping the week code to obtain the month, year and season of the week code

_df_ventas_historic['month'], _df_ventas_historic['year'], _df_ventas_historic['season'] = zip(*_df_ventas_historic['cod_semana'].apply(lambda week_code: week_to_month_year_season(week_code, 2021)))


In [17]:
###control cell for checking the data
_df_ventas_historic

Unnamed: 0,cod_tienda,cod_semana,cod_producto,ventas_unidades,ventas_valor,ventas_volumen,numero_referencias,precio_real_unidades,precio_real_volumen,precio_tarifa_unidades,precio_tarifa_volumen,month,year,season
0,243,42,236,30,19.50,10,1,0.65,0.33,0.65,0.33,October,2021,Fall
1,243,42,239,2,1.82,1,1,0.91,0.33,0.91,0.33,October,2021,Fall
2,243,42,247,2,10.32,4,1,5.16,1.98,5.16,1.98,October,2021,Fall
3,243,42,258,3,14.97,6,1,4.99,1.98,4.99,1.98,October,2021,Fall
4,243,42,264,2,10.88,4,1,5.44,1.98,5.44,1.98,October,2021,Fall
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5730087,526,47,625,100,127.00,36,1,1.27,0.36,1.27,0.36,November,2021,Fall
5730088,526,47,633,20,49.00,10,1,2.45,0.50,2.45,0.50,November,2021,Fall
5730089,526,47,652,6,37.50,18,1,6.25,3.00,6.25,3.00,November,2021,Fall
5730090,526,47,654,23,85.10,35,1,3.70,1.50,3.70,1.50,November,2021,Fall


In [18]:
### merging the historic data with the products dataset
_df_ventas_historic=_df_ventas_historic.merge(_df_Products, on='cod_producto', how='left')


In [19]:
###control cell for checking the data
_df_ventas_historic

Unnamed: 0,cod_tienda,cod_semana,cod_producto,ventas_unidades,ventas_valor,ventas_volumen,numero_referencias,precio_real_unidades,precio_real_volumen,precio_tarifa_unidades,precio_tarifa_volumen,month,year,season,CATEGORY,SEGMENT,MANUFACTURER,BRAND,PACKAGING,VOLUME,UNITS
0,243,42,236,30,19.50,10,1,0.65,0.33,0.65,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CAN,330ML,1CT
1,243,42,239,2,1.82,1,1,0.91,0.33,0.91,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CRISTAL BOTTLE,330ML,1CT
2,243,42,247,2,10.32,4,1,5.16,1.98,5.16,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 2,CRISTAL BOTTLE,330ML,6CT
3,243,42,258,3,14.97,6,1,4.99,1.98,4.99,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT
4,243,42,264,2,10.88,4,1,5.44,1.98,5.44,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5128389,526,47,625,100,127.00,36,1,1.27,0.36,1.27,0.36,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,355ML,1CT
5128390,526,47,633,20,49.00,10,1,2.45,0.50,2.45,0.50,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,500ML,1CT
5128391,526,47,652,6,37.50,18,1,6.25,3.00,6.25,3.00,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,12CT
5128392,526,47,654,23,85.10,35,1,3.70,1.50,3.70,1.50,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,6CT


In [20]:
### merging the data with the promotions dummy
_df_ventas_historic=_df_ventas_historic.merge(promo_dummies, on=['cod_tienda','cod_semana','cod_producto'], how='left')


In [21]:
###control cell for checking the data
_df_ventas_historic

Unnamed: 0,cod_tienda,cod_semana,cod_producto,ventas_unidades,ventas_valor,ventas_volumen,numero_referencias,precio_real_unidades,precio_real_volumen,precio_tarifa_unidades,precio_tarifa_volumen,month,year,season,CATEGORY,SEGMENT,MANUFACTURER,BRAND,PACKAGING,VOLUME,UNITS,promocion_cabecera,promocion_descuento,promocion_expositor,promocion_extra_cantidad,promocion_folleto,promocion_isla,promocion_multicompra,promocion_regalo
0,243,42,236,30,19.50,10,1,0.65,0.33,0.65,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CAN,330ML,1CT,,,,,,,,
1,243,42,239,2,1.82,1,1,0.91,0.33,0.91,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CRISTAL BOTTLE,330ML,1CT,,,,,,,,
2,243,42,247,2,10.32,4,1,5.16,1.98,5.16,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 2,CRISTAL BOTTLE,330ML,6CT,,,,,,,,
3,243,42,258,3,14.97,6,1,4.99,1.98,4.99,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT,,,,,,,,
4,243,42,264,2,10.88,4,1,5.44,1.98,5.44,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5128389,526,47,625,100,127.00,36,1,1.27,0.36,1.27,0.36,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,355ML,1CT,,,,,,,,
5128390,526,47,633,20,49.00,10,1,2.45,0.50,2.45,0.50,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,500ML,1CT,,,,,,,,
5128391,526,47,652,6,37.50,18,1,6.25,3.00,6.25,3.00,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,12CT,,,,,,,,
5128392,526,47,654,23,85.10,35,1,3.70,1.50,3.70,1.50,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,6CT,,,,,,,,


In [22]:
### filling the NaN values with 0 

pattern = r'^promocion_'
columns_to_fill = _df_ventas_historic.filter(regex=pattern).columns
_df_ventas_historic[columns_to_fill] = _df_ventas_historic[columns_to_fill].fillna(0,downcast='infer').astype(int)


In [23]:
###control cell for checking the data
_df_ventas_historic

Unnamed: 0,cod_tienda,cod_semana,cod_producto,ventas_unidades,ventas_valor,ventas_volumen,numero_referencias,precio_real_unidades,precio_real_volumen,precio_tarifa_unidades,precio_tarifa_volumen,month,year,season,CATEGORY,SEGMENT,MANUFACTURER,BRAND,PACKAGING,VOLUME,UNITS,promocion_cabecera,promocion_descuento,promocion_expositor,promocion_extra_cantidad,promocion_folleto,promocion_isla,promocion_multicompra,promocion_regalo
0,243,42,236,30,19.50,10,1,0.65,0.33,0.65,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CAN,330ML,1CT,0,0,0,0,0,0,0,0
1,243,42,239,2,1.82,1,1,0.91,0.33,0.91,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CRISTAL BOTTLE,330ML,1CT,0,0,0,0,0,0,0,0
2,243,42,247,2,10.32,4,1,5.16,1.98,5.16,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 2,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0
3,243,42,258,3,14.97,6,1,4.99,1.98,4.99,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0
4,243,42,264,2,10.88,4,1,5.44,1.98,5.44,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5128389,526,47,625,100,127.00,36,1,1.27,0.36,1.27,0.36,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,355ML,1CT,0,0,0,0,0,0,0,0
5128390,526,47,633,20,49.00,10,1,2.45,0.50,2.45,0.50,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,500ML,1CT,0,0,0,0,0,0,0,0
5128391,526,47,652,6,37.50,18,1,6.25,3.00,6.25,3.00,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,12CT,0,0,0,0,0,0,0,0
5128392,526,47,654,23,85.10,35,1,3.70,1.50,3.70,1.50,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,6CT,0,0,0,0,0,0,0,0


In [24]:
### Merge pryoections with historic data
_df_ventas_historic=_df_ventas_historic.merge(_df_proyecciones, on=['cod_tienda','cod_semana'], how='left')


In [25]:
###control cell for checking the data
_df_ventas_historic

Unnamed: 0,cod_tienda,cod_semana,cod_producto,ventas_unidades,ventas_valor,ventas_volumen,numero_referencias,precio_real_unidades,precio_real_volumen,precio_tarifa_unidades,precio_tarifa_volumen,month,year,season,CATEGORY,SEGMENT,MANUFACTURER,BRAND,PACKAGING,VOLUME,UNITS,promocion_cabecera,promocion_descuento,promocion_expositor,promocion_extra_cantidad,promocion_folleto,promocion_isla,promocion_multicompra,promocion_regalo,factor_extrapolacion
0,243,42,236,30,19.50,10,1,0.65,0.33,0.65,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CAN,330ML,1CT,0,0,0,0,0,0,0,0,1148
1,243,42,239,2,1.82,1,1,0.91,0.33,0.91,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CRISTAL BOTTLE,330ML,1CT,0,0,0,0,0,0,0,0,1148
2,243,42,247,2,10.32,4,1,5.16,1.98,5.16,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 2,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148
3,243,42,258,3,14.97,6,1,4.99,1.98,4.99,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148
4,243,42,264,2,10.88,4,1,5.44,1.98,5.44,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5128389,526,47,625,100,127.00,36,1,1.27,0.36,1.27,0.36,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,355ML,1CT,0,0,0,0,0,0,0,0,189
5128390,526,47,633,20,49.00,10,1,2.45,0.50,2.45,0.50,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,500ML,1CT,0,0,0,0,0,0,0,0,189
5128391,526,47,652,6,37.50,18,1,6.25,3.00,6.25,3.00,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,12CT,0,0,0,0,0,0,0,0,189
5128392,526,47,654,23,85.10,35,1,3.70,1.50,3.70,1.50,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,6CT,0,0,0,0,0,0,0,0,189


In [26]:
### Merge stores with historic data
_df_ventas_historic=_df_ventas_historic.merge(_df_Stores, on=['cod_tienda'], how='left')


In [27]:
###control cell for checking the data
_df_ventas_historic

Unnamed: 0,cod_tienda,cod_semana,cod_producto,ventas_unidades,ventas_valor,ventas_volumen,numero_referencias,precio_real_unidades,precio_real_volumen,precio_tarifa_unidades,precio_tarifa_volumen,month,year,season,CATEGORY,SEGMENT,MANUFACTURER,BRAND,PACKAGING,VOLUME,UNITS,promocion_cabecera,promocion_descuento,promocion_expositor,promocion_extra_cantidad,promocion_folleto,promocion_isla,promocion_multicompra,promocion_regalo,factor_extrapolacion,cod_canal,cod_provincia,postal_code,sales_surface_sqmeters
0,243,42,236,30,19.50,10,1,0.65,0.33,0.65,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CAN,330ML,1CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500
1,243,42,239,2,1.82,1,1,0.91,0.33,0.91,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CRISTAL BOTTLE,330ML,1CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500
2,243,42,247,2,10.32,4,1,5.16,1.98,5.16,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 2,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500
3,243,42,258,3,14.97,6,1,4.99,1.98,4.99,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500
4,243,42,264,2,10.88,4,1,5.44,1.98,5.44,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5128389,526,47,625,100,127.00,36,1,1.27,0.36,1.27,0.36,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,355ML,1CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500
5128390,526,47,633,20,49.00,10,1,2.45,0.50,2.45,0.50,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,500ML,1CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500
5128391,526,47,652,6,37.50,18,1,6.25,3.00,6.25,3.00,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,12CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500
5128392,526,47,654,23,85.10,35,1,3.70,1.50,3.70,1.50,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,6CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500


In [28]:
### Merge stores with historic data
_df_ventas_historic=_df_ventas_historic.merge(_df_Channels, on='cod_canal', how='left')


In [29]:
###control cell for checking the data
_df_ventas_historic

Unnamed: 0,cod_tienda,cod_semana,cod_producto,ventas_unidades,ventas_valor,ventas_volumen,numero_referencias,precio_real_unidades,precio_real_volumen,precio_tarifa_unidades,precio_tarifa_volumen,month,year,season,CATEGORY,SEGMENT,MANUFACTURER,BRAND,PACKAGING,VOLUME,UNITS,promocion_cabecera,promocion_descuento,promocion_expositor,promocion_extra_cantidad,promocion_folleto,promocion_isla,promocion_multicompra,promocion_regalo,factor_extrapolacion,cod_canal,cod_provincia,postal_code,sales_surface_sqmeters,Canal,Channel
0,243,42,236,30,19.50,10,1,0.65,0.33,0.65,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CAN,330ML,1CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets
1,243,42,239,2,1.82,1,1,0.91,0.33,0.91,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CRISTAL BOTTLE,330ML,1CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets
2,243,42,247,2,10.32,4,1,5.16,1.98,5.16,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 2,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets
3,243,42,258,3,14.97,6,1,4.99,1.98,4.99,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets
4,243,42,264,2,10.88,4,1,5.44,1.98,5.44,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5128389,526,47,625,100,127.00,36,1,1.27,0.36,1.27,0.36,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,355ML,1CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500,Hipermercados,Hipermarkets
5128390,526,47,633,20,49.00,10,1,2.45,0.50,2.45,0.50,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,500ML,1CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500,Hipermercados,Hipermarkets
5128391,526,47,652,6,37.50,18,1,6.25,3.00,6.25,3.00,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,12CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500,Hipermercados,Hipermarkets
5128392,526,47,654,23,85.10,35,1,3.70,1.50,3.70,1.50,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,6CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500,Hipermercados,Hipermarkets


### Checkpoint - 1


In [30]:
######checkpoint df_ventas_historic 1 Colab
#_df_ventas_historic.to_csv('/content/drive/MyDrive/ColabNotebooks/CapstonesDatasets/_df_ventas_historic_CP1_n.csv')


In [31]:
######checkpoint df_ventas_historic 1 Local
#_df_ventas_historic.to_csv('Datasets/_df_ventas_historic_CP1_n.csv')


In [32]:
######Read checkpoint df_ventas_historic 1 colab
#_df_ventas_historic = pd.read_csv('/content/drive/MyDrive/ColabNotebooks/CapstonesDatasets/_df_ventas_historic_CP1_n.csv')

In [33]:
######Read checkpoint df_ventas_historic 1 local
#_df_ventas_historic = pd.read_csv('Datasets/_df_ventas_historic_CP1_n.csv')

In [34]:
###control cell for checking the data
_df_ventas_historic

Unnamed: 0,cod_tienda,cod_semana,cod_producto,ventas_unidades,ventas_valor,ventas_volumen,numero_referencias,precio_real_unidades,precio_real_volumen,precio_tarifa_unidades,precio_tarifa_volumen,month,year,season,CATEGORY,SEGMENT,MANUFACTURER,BRAND,PACKAGING,VOLUME,UNITS,promocion_cabecera,promocion_descuento,promocion_expositor,promocion_extra_cantidad,promocion_folleto,promocion_isla,promocion_multicompra,promocion_regalo,factor_extrapolacion,cod_canal,cod_provincia,postal_code,sales_surface_sqmeters,Canal,Channel
0,243,42,236,30,19.50,10,1,0.65,0.33,0.65,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CAN,330ML,1CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets
1,243,42,239,2,1.82,1,1,0.91,0.33,0.91,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CRISTAL BOTTLE,330ML,1CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets
2,243,42,247,2,10.32,4,1,5.16,1.98,5.16,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 2,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets
3,243,42,258,3,14.97,6,1,4.99,1.98,4.99,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets
4,243,42,264,2,10.88,4,1,5.44,1.98,5.44,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5128389,526,47,625,100,127.00,36,1,1.27,0.36,1.27,0.36,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,355ML,1CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500,Hipermercados,Hipermarkets
5128390,526,47,633,20,49.00,10,1,2.45,0.50,2.45,0.50,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,500ML,1CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500,Hipermercados,Hipermarkets
5128391,526,47,652,6,37.50,18,1,6.25,3.00,6.25,3.00,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,12CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500,Hipermercados,Hipermarkets
5128392,526,47,654,23,85.10,35,1,3.70,1.50,3.70,1.50,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,6CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500,Hipermercados,Hipermarkets


In [35]:
_df_ventas_historic=_df_ventas_historic.merge(_df_Provinces_enriched_x_y, on='cod_provincia', how='left')


In [36]:
###control cell for checking the data
_df_ventas_historic

Unnamed: 0,cod_tienda,cod_semana,cod_producto,ventas_unidades,ventas_valor,ventas_volumen,numero_referencias,precio_real_unidades,precio_real_volumen,precio_tarifa_unidades,precio_tarifa_volumen,month,year,season,CATEGORY,SEGMENT,MANUFACTURER,BRAND,PACKAGING,VOLUME,UNITS,promocion_cabecera,promocion_descuento,promocion_expositor,promocion_extra_cantidad,promocion_folleto,promocion_isla,promocion_multicompra,promocion_regalo,factor_extrapolacion,cod_canal,cod_provincia,postal_code,sales_surface_sqmeters,Canal,Channel,Provincia,Comunidad autónoma,Lat,Lon
0,243,42,236,30,19.50,10,1,0.65,0.33,0.65,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CAN,330ML,1CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets,La Rioja,La Rioja,42.27474,-2.51708
1,243,42,239,2,1.82,1,1,0.91,0.33,0.91,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CRISTAL BOTTLE,330ML,1CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets,La Rioja,La Rioja,42.27474,-2.51708
2,243,42,247,2,10.32,4,1,5.16,1.98,5.16,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 2,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets,La Rioja,La Rioja,42.27474,-2.51708
3,243,42,258,3,14.97,6,1,4.99,1.98,4.99,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets,La Rioja,La Rioja,42.27474,-2.51708
4,243,42,264,2,10.88,4,1,5.44,1.98,5.44,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets,La Rioja,La Rioja,42.27474,-2.51708
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5128389,526,47,625,100,127.00,36,1,1.27,0.36,1.27,0.36,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,355ML,1CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500,Hipermercados,Hipermarkets,Area Metropolitana de Barcelona,Cataluña,41.37591,2.14988
5128390,526,47,633,20,49.00,10,1,2.45,0.50,2.45,0.50,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,500ML,1CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500,Hipermercados,Hipermarkets,Area Metropolitana de Barcelona,Cataluña,41.37591,2.14988
5128391,526,47,652,6,37.50,18,1,6.25,3.00,6.25,3.00,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,12CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500,Hipermercados,Hipermarkets,Area Metropolitana de Barcelona,Cataluña,41.37591,2.14988
5128392,526,47,654,23,85.10,35,1,3.70,1.50,3.70,1.50,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,6CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500,Hipermercados,Hipermarkets,Area Metropolitana de Barcelona,Cataluña,41.37591,2.14988


In [37]:
### Adding the weather (clima) data to the historic dataframe
_df_ventas_historic=_df_ventas_historic.merge(_df_clima, on=['cod_semana','cod_provincia'], how='left')

In [38]:

# Iterate over the years 2021, 2022, and 2023 
for year in range(2021, 2024):
    # Create holiday flag 
    _df_ventas_historic[f'national_holidays_{year}'] = 'N'
    _df_ventas_historic[f'regional_holidays_{year}'] = 'N'
    _df_ventas_historic[f'local_holidays_{year}'] = 'N'

    # Iterate over the holiday columns in _df_holidays 
    for holiday_type in ['national_holidays', 'regional_holidays', 'local_holidays']:
        holiday_column = f'{holiday_type}_{year}_week_code'
        _df_ventas_historic.loc[_df_ventas_historic['cod_semana'].isin(_df_holidays[holiday_column]), f'{holiday_type}_{year}'] = 'Y'

# Drop the week code columns from _df_holidays
_df_holidays.drop(columns=['national_holidays_2021_week_code', 'regional_holidays_2021_week_code', 'local_holidays_2021_week_code',
                    'national_holidays_2022_week_code', 'regional_holidays_2022_week_code', 'local_holidays_2022_week_code',
                    'national_holidays_2023_week_code', 'regional_holidays_2023_week_code', 'local_holidays_2023_week_code'],
           inplace=True)



In [39]:
###control cell for checking the data
_df_ventas_historic

Unnamed: 0,cod_tienda,cod_semana,cod_producto,ventas_unidades,ventas_valor,ventas_volumen,numero_referencias,precio_real_unidades,precio_real_volumen,precio_tarifa_unidades,precio_tarifa_volumen,month,year,season,CATEGORY,SEGMENT,MANUFACTURER,BRAND,PACKAGING,VOLUME,UNITS,promocion_cabecera,promocion_descuento,promocion_expositor,promocion_extra_cantidad,promocion_folleto,promocion_isla,promocion_multicompra,promocion_regalo,factor_extrapolacion,cod_canal,cod_provincia,postal_code,sales_surface_sqmeters,Canal,Channel,Provincia,Comunidad autónoma,Lat,Lon,TEMP_MINIMA,TEMP_MAXIMA,TEMP_MEDIA,PRECIPITACION,national_holidays_2021,regional_holidays_2021,local_holidays_2021,national_holidays_2022,regional_holidays_2022,local_holidays_2022,national_holidays_2023,regional_holidays_2023,local_holidays_2023
0,243,42,236,30,19.50,10,1,0.65,0.33,0.65,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CAN,330ML,1CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets,La Rioja,La Rioja,42.27474,-2.51708,5.671429,12.285714,8.800000,1.285714,N,N,N,N,N,N,N,N,N
1,243,42,239,2,1.82,1,1,0.91,0.33,0.91,0.33,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 1,CRISTAL BOTTLE,330ML,1CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets,La Rioja,La Rioja,42.27474,-2.51708,5.671429,12.285714,8.800000,1.285714,N,N,N,N,N,N,N,N,N
2,243,42,247,2,10.32,4,1,5.16,1.98,5.16,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 2,MAN 2 - BRAND 2,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets,La Rioja,La Rioja,42.27474,-2.51708,5.671429,12.285714,8.800000,1.285714,N,N,N,N,N,N,N,N,N
3,243,42,258,3,14.97,6,1,4.99,1.98,4.99,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets,La Rioja,La Rioja,42.27474,-2.51708,5.671429,12.285714,8.800000,1.285714,N,N,N,N,N,N,N,N,N
4,243,42,264,2,10.88,4,1,5.44,1.98,5.44,1.98,October,2021,Fall,BEER,EXTRA,MANUFACTURER 4,MAN 4 - BRAND 1,CRISTAL BOTTLE,330ML,6CT,0,0,0,0,0,0,0,0,1148,1,26,26003,1500,Supermercados,Supermarkets,La Rioja,La Rioja,42.27474,-2.51708,5.671429,12.285714,8.800000,1.285714,N,N,N,N,N,N,N,N,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5128389,526,47,625,100,127.00,36,1,1.27,0.36,1.27,0.36,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,355ML,1CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500,Hipermercados,Hipermarkets,Area Metropolitana de Barcelona,Cataluña,41.37591,2.14988,7.657143,14.114286,10.442857,1.242857,N,N,N,N,N,Y,N,N,N
5128390,526,47,633,20,49.00,10,1,2.45,0.50,2.45,0.50,November,2021,Fall,BEER,IMPORT PREMIUM,OTHER MANUFACTURERS,OTHER MAN - OTHER BRANDS,CRISTAL BOTTLE,500ML,1CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500,Hipermercados,Hipermarkets,Area Metropolitana de Barcelona,Cataluña,41.37591,2.14988,7.657143,14.114286,10.442857,1.242857,N,N,N,N,N,Y,N,N,N
5128391,526,47,652,6,37.50,18,1,6.25,3.00,6.25,3.00,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,12CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500,Hipermercados,Hipermarkets,Area Metropolitana de Barcelona,Cataluña,41.37591,2.14988,7.657143,14.114286,10.442857,1.242857,N,N,N,N,N,Y,N,N,N
5128392,526,47,654,23,85.10,35,1,3.70,1.50,3.70,1.50,November,2021,Fall,BEER,IMPORT PREMIUM,MANUFACTURER 2,MAN 2 - BRAND 3,CRISTAL BOTTLE,250ML,6CT,0,0,0,0,0,0,0,0,189,2,91,8029,4500,Hipermercados,Hipermarkets,Area Metropolitana de Barcelona,Cataluña,41.37591,2.14988,7.657143,14.114286,10.442857,1.242857,N,N,N,N,N,Y,N,N,N


### Checkpoint - 2

In [40]:
#####Checkpoint _df_ventas_historic 2 Colab
#_df_ventas_historic.to_csv('/content/drive/MyDrive/ColabNotebooks/CapstonesDatasets/_df_ventas_historic_CP2.csv')

In [41]:
######Read checkpoint df_ventas_historic 2 Colab
#_df_ventas_historic = pd.read_csv('/content/drive/MyDrive/ColabNotebooks/CapstonesDatasets/_df_ventas_historic_CP2.csv')

In [42]:
######checkpoint df_ventas_historic 2 Local
#_df_ventas_historic.to_csv('Datasets/_df_ventas_historic_CP2.csv')

In [43]:
######Read checkpoint df_ventas_historic 2 local
#_df_ventas_historic = pd.read_csv('Datasets/_df_ventas_historic_CP2.csv')

In [None]:

report = ProfileReport(_df_ventas_historic)
report.to_file(output_file='Datasets/report_output.html')

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]