# Importing libraries

In [312]:
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
import plotly.express as px

# Importing the dataset

In [313]:
df = pd.read_csv('/content/Consumo_cerveja.csv', decimal=',')

In [314]:
df_holidays = pd.read_csv('/content/Holidays.csv')

In [315]:
df

Unnamed: 0,Data,Temperatura Media (C),Temperatura Minima (C),Temperatura Maxima (C),Precipitacao (mm),Final de Semana,Consumo de cerveja (litros)
0,2015-01-01,27.30,23.9,32.5,0.0,0,25.461
1,2015-01-02,27.02,24.5,33.5,0.0,0,28.972
2,2015-01-03,24.82,22.4,29.9,0.0,1,30.814
3,2015-01-04,23.98,21.5,28.6,1.2,1,29.799
4,2015-01-05,23.82,21.0,28.3,0.0,0,28.900
...,...,...,...,...,...,...,...
360,2015-12-27,24.00,21.1,28.2,13.6,1,32.307
361,2015-12-28,22.64,21.1,26.7,0.0,0,26.095
362,2015-12-29,21.68,20.3,24.1,10.3,0,22.309
363,2015-12-30,21.38,19.3,22.4,6.3,0,20.467


# Structural Analysis

In [316]:
df.isnull().sum()

Data                           0
Temperatura Media (C)          0
Temperatura Minima (C)         0
Temperatura Maxima (C)         0
Precipitacao (mm)              0
Final de Semana                0
Consumo de cerveja (litros)    0
dtype: int64

In [317]:
df.dtypes

Data                            object
Temperatura Media (C)          float64
Temperatura Minima (C)         float64
Temperatura Maxima (C)         float64
Precipitacao (mm)              float64
Final de Semana                  int64
Consumo de cerveja (litros)     object
dtype: object

# Structural Manipulation

In [318]:
df.rename(columns={'Data': 'date', 'Temperatura Media (C)': 'avg_temp', 'Temperatura Minima (C)': 'min_temp', 'Temperatura Maxima (C)': 'max_temp', 'Precipitacao (mm)': 'rainfall', 'Final de Semana': 'is_weekend', 'Consumo de cerveja (litros)': 'beer_consumption' }, inplace=True)

In [319]:
df['date'] = df['date'].astype('datetime64[ns]')

In [320]:
df['beer_consumption'] = df['beer_consumption'].astype('float')

In [321]:
df_holidays['DATE'] = df_holidays['DATE'].astype('datetime64[ns]')

In [322]:
df.dtypes

date                datetime64[ns]
avg_temp                   float64
min_temp                   float64
max_temp                   float64
rainfall                   float64
is_weekend                   int64
beer_consumption           float64
dtype: object

In [323]:
df_holidays.dtypes

DATE       datetime64[ns]
HOLIDAY            object
dtype: object

In [324]:
datelist = df_holidays.iloc[:,0].tolist()

In [325]:
df.loc[df['date'].isin(datelist), 'is_holiday'] = 1
df.loc[~df['date'].isin(datelist), 'is_holiday'] = 0

In [326]:
df['weekday'] = df['date'].dt.day_name()

In [327]:
df['month'] = df['date'].dt.strftime('%b')

In [328]:
df

Unnamed: 0,date,avg_temp,min_temp,max_temp,rainfall,is_weekend,beer_consumption,is_holiday,weekday,month
0,2015-01-01,27.30,23.9,32.5,0.0,0,25.461,1.0,Thursday,Jan
1,2015-01-02,27.02,24.5,33.5,0.0,0,28.972,0.0,Friday,Jan
2,2015-01-03,24.82,22.4,29.9,0.0,1,30.814,0.0,Saturday,Jan
3,2015-01-04,23.98,21.5,28.6,1.2,1,29.799,0.0,Sunday,Jan
4,2015-01-05,23.82,21.0,28.3,0.0,0,28.900,0.0,Monday,Jan
...,...,...,...,...,...,...,...,...,...,...
360,2015-12-27,24.00,21.1,28.2,13.6,1,32.307,0.0,Sunday,Dec
361,2015-12-28,22.64,21.1,26.7,0.0,0,26.095,0.0,Monday,Dec
362,2015-12-29,21.68,20.3,24.1,10.3,0,22.309,0.0,Tuesday,Dec
363,2015-12-30,21.38,19.3,22.4,6.3,0,20.467,0.0,Wednesday,Dec


# Data Analysis


In [329]:
outono = datetime.date(2015,3,20)
inverno = datetime.date(2015,6,21)
primavera = datetime.date(2015,9,23)
verao = datetime.date(2015,12,22)

In [330]:
fig = px.line(df, x='date', y=['beer_consumption','avg_temp'])
fig.add_vrect(x0=outono, x1=inverno, annotation_text='Outono', fillcolor="orange", opacity=0.15, line_width=0)
fig.add_vrect(x0=inverno, x1=primavera, annotation_text='Inverno', fillcolor="blue", opacity=0.15, line_width=0)
fig.add_vrect(x0=primavera, x1=verao, annotation_text='Primavera', fillcolor="yellow", opacity=0.15, line_width=0)
fig.add_vrect(x0=verao, x1=(datetime.date(2015,12,31)), fillcolor="red", opacity=0.15, line_width=0)
fig.add_vrect(x0=datetime.date(2015,1,1), x1=outono, annotation_text='Verão', fillcolor="red", opacity=0.15, line_width=0)

fig.show()

First, let's take a look at the temperature in São Paulo. The lowest and highest temperatures recorded in the city were 10.6 degrees Celsius and 36.5 degrees Celsius, respectively. There were no big temperature variations, and the median stayed between 13 and 29 degrees. On average, autumn and winter temperatures are cooler and spring and summer are warmer. However, relatively high variability was observed within each season.

In [331]:
fig = px.box(df,x='weekday', y="beer_consumption")
fig.show()

Beer consumption was highest on Saturday and Sunday, and lowest on Monday and Wednesday. The average monthly alcohol consumption at the survey sites was 1324.5 litres per working day. Consumption is above average only on weekends, lower on other days (including Fridays).

In [332]:
fig = px.box(df,x='is_weekend', y="beer_consumption")
fig.show()

On average, alcohol consumption is higher on weekends than on weekdays. The median difference in average consumption is about 6 liters, which is more than 20% of the average consumption. However, there were certain days in the middle of the week when alcohol consumption increased significantly. There are three such days throughout the year - one Tuesday and two Thursdays. Weekend consumption is characterized by a low dispersion around the median, with no extremes.

In [333]:
fig = px.box(df,x=['is_holiday'], y="beer_consumption")
fig.show()

We can see that there is no huge difference between the consumption of beer on holidays or regular days.

In [341]:
px.imshow(df.corr().round(decimals=3),text_auto=True)

We can see a satisfactory correlation with the temperatures and the weekend.

To build the model, we will remove the date and base it on the weather forecast, avoiding the construction of a Recurrent Neural Network (RNN) model that requires more processing and using simpler regression models.