# Retail Forecasting



## Problem Statement:

The large company who is into beverages business in Australia. They sell their products through various super-markets and also engage into heavy promotions throughout the year. Their demand is also influenced by various factors like holiday, seasonality. They needed forecast of each of products at item level every week in weekly buckets. 

In [None]:
#import data 
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime

In [None]:
forecast = pd.read_csv('./Data/newForecast.csv')
forecast.head(3)

In [None]:
forecast.info()

In [None]:
forecast['date'] = pd.to_datetime(forecast['date'])

In [None]:
forecast.info()

In [None]:
forecast.drop('Unnamed: 0', axis=1, inplace=True)

In [None]:
forecast.head(3)

# Add Year

In [None]:
forecast['Year'] = forecast['date'].dt.year
forecast.head()

# Add Month

In [None]:
forecast['Month'] = forecast['date'].dt.month
forecast.head()

# EDA

In [None]:
#set color 

color = sns.color_palette("Purples_r", 10)
palette = "Purples"
#BuPu
#palette_tab = cubehelix_palette()

In [None]:
sns.palplot(color)

## Bar plot of products count

In [None]:
fig, ax = plt.subplots(figsize=(15,5))
plt.xticks(rotation = 45)

plt.title("Each Product Count")

sns.countplot(data=forecast, x="Product", palette=color)

for container in ax.containers:
    ax.bar_label(container)

Product 6 has slightly less amount of products sold.

## Sales amount of each product 

In [None]:
ts = forecast.groupby(["Product"])["Sales"].apply(lambda x : x.astype(int).sum())
ts = ts.to_frame()
ts.reset_index(inplace=True)
ts = ts.sort_values('Sales', ascending=False)
ts.head()

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.xticks(rotation = 45)

plt.title("Total Sales of Each Product Sales")

sns.barplot(data=ts, x="Product", y="Sales", palette=palette)

for container in ax.containers:
    ax.bar_label(container)

## Change overtime of all sales

In [None]:
fig, ax = plt.subplots(figsize=(15,5))
plt.xticks(rotation = 45)

plt.title("Change overtime of all sales")

sns.lineplot(data=forecast, x="date", y="Sales", color=color[5])

## Sales Based on Product

In [None]:
fig, ax = plt.subplots(figsize=(15,5))
plt.xticks(rotation = 45)

plt.title("Sales Based on Product")

sns.lineplot(data=forecast, x="date", y="Sales", hue="Product", palette=palette)

## Sales Based on Product over time

In [None]:
fig, axs = plt.subplots(3, 2, figsize=(20, 20))

#Product 1
sns.lineplot(data=forecast[forecast["Product"] == "SKU1"], x="date", y = "Sales", 
             color=color[0], ax=axs[0, 0]).set(title="Product 1")
#Product 2
sns.lineplot(data=forecast[forecast["Product"] == "SKU2"], x="date", y = "Sales", 
             color=color[1], ax=axs[0, 1]).set(title="Product 2")

#Product 3
sns.lineplot(data=forecast[forecast["Product"] == "SKU3"], x="date", y = "Sales", 
             color=color[2], ax=axs[1, 0]).set(title="Product 3")
#Product 4
sns.lineplot(data=forecast[forecast["Product"] == "SKU4"], x="date", y = "Sales", 
             color=color[3], ax=axs[1, 1]).set(title="Product 4")

#Product 5
sns.lineplot(data=forecast[forecast["Product"] == "SKU5"], x="date", y = "Sales", 
             color=color[4], ax=axs[2, 0]).set(title="Product 5")
#Product 6
sns.lineplot(data=forecast[forecast["Product"] == "SKU6"], x="date", y = "Sales", 
             color=color[5], ax=axs[2, 1]).set(title="Product 6")


## Distributions of monlthy sales over time

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.xticks(rotation = 45)

plt.title("Distributions of Monthly Sales Over Time")

year_month_sales = forecast.groupby(["Year", "Month"])["Sales"].apply(lambda x : x.astype(int).sum())

year_month_sales.plot(kind="bar", color=color[5])

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.xticks(rotation = 45)

plt.title("Distributions of Monthly Sales Over Time")

year_month_sales.plot(kind="line", color=color[5])

## Distributions of Monthly Sales Over Time Based On Product

In [None]:
mps = forecast.groupby(["Year", "Month", "Product"])["Sales"].apply(lambda x : x.astype(int).sum())

mps = mps.to_frame()
mps.reset_index(inplace=True)
mps.head()

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.xticks(rotation = 45)

plt.title("Distributions of Monthly Sales Over Time Based On Product")
sns.lineplot(data = mps, x="Month", y="Sales", hue="Product", palette=palette)

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.xticks(rotation = 45)

plt.title("Distributions of Monthly Sales Over Time Based On Product")
sns.barplot(data = mps, x="Month", y="Sales", hue="Product", palette=palette)

## Distributions of Quarter Sales Over Time

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.xticks(rotation = 45)

plt.title("Distributions of Quarter Sales Over Time")

year_quarter_sales = forecast.groupby(["Year", "Quarter"])["Sales"].apply(lambda x : x.astype(int).sum())

year_quarter_sales.plot(kind="bar", color=color[5])


In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.xticks(rotation = 45)

plt.title("Distributions of Quarter Sales Over Time")

year_quarter_sales.plot(kind="line", color=color[5])


## Distributions of Quarter Sales Over Time Based On Product

In [None]:
qtr_prod_sales = forecast.groupby(["Year", "Quarter", "Product"])["Sales"].apply(
    lambda x : x.astype(int).sum())

qtr_prod_sales = qtr_prod_sales.to_frame()
qtr_prod_sales.reset_index(inplace=True)
qtr_prod_sales.head()

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.xticks(rotation = 45)

plt.title("Distributions of Quarter Sales Over Time Based On Product")
sns.lineplot(data = qtr_prod_sales, x="Quarter", y="Sales", hue="Product", palette=palette)

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.xticks(rotation = 45)

plt.title("Distributions of Quarter Sales Over Time Based On Product")
sns.barplot(data = qtr_prod_sales, x="Quarter", y="Sales", hue="Product", palette=palette)


## Average Quarter Sales over time

In [None]:
avgqtr = forecast.groupby(["Year", "Quarter"])["Sales"].mean()
#avgqtr = forecast.groupby(pd.PeriodIndex(forecast['date'], freq="M"))['Sales'].mean()
avgqtr = avgqtr.to_frame()
avgqtr.reset_index(inplace=True)

avgqtr["Sales"] = avgqtr["Sales"].apply(lambda x: round(x, 2)) 

avgqtr.head()

In [None]:
avgqtr['date'] = avgqtr['Year'].astype(str) +"-"+ avgqtr['Quarter'].astype(str)
avgqtr.head()

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.xticks(rotation = 45)

plt.title("Average Quarter Sales Over Time")

sns.lineplot(data = avgqtr, x="date", y="Sales", color=color[5])

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.xticks(rotation = 45)

plt.title("Average Quarter Sales Over Time")

sns.barplot(data = avgqtr, x="date", y="Sales", palette=palette)

## Average Quarter Sales per Product over time

In [None]:
avgqtr_prod = forecast.groupby(["Year", "Quarter", "Product"])["Sales"].mean()
avgqtr_prod = avgqtr_prod.to_frame()
avgqtr_prod.reset_index(inplace=True)

avgqtr_prod["Sales"] = avgqtr_prod["Sales"].apply(lambda x: round(x, 2)) 
avgqtr_prod['date'] = avgqtr_prod['Year'].astype(str) +"-"+ avgqtr_prod['Quarter'].astype(str)

avgqtr_prod.head()

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.xticks(rotation = 45)

plt.title("Average Quarter Sales Over Time by Product")

sns.lineplot(data = avgqtr_prod, x="date", y="Sales", hue="Product", palette=palette)

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.xticks(rotation = 45)

plt.title("Average Quarter Sales Over Time by Product")

sns.barplot(data = avgqtr_prod, x="date", y="Sales", hue="Product", palette=palette)

# Average Monthly Sales Over Time

In [None]:
avgmth = forecast.groupby(["Year", "Month"])["Sales"].mean()
avgmth = avgmth.to_frame()
avgmth.reset_index(inplace=True)

avgmth["Sales"] = avgmth["Sales"].apply(lambda x: round(x, 2)) 
avgmth['date'] = avgmth['Year'].astype(str) +"-"+ avgmth['Month'].astype(str)

avgmth.head()

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.xticks(rotation = 45)

plt.title("Average Monthly Sales Over Time by Product")

sns.lineplot(data = avgqtr_prod, x="date", y="Sales", palette=palette)

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.xticks(rotation = 45)

plt.title("Average Monthly Sales Over Time by Product")

sns.barplot(data = avgqtr_prod, x="date", y="Sales", palette=palette)

## Average Monthly Sales Over Time by Product

In [None]:
avgmth_prod = forecast.groupby(["Year", "Month", "Product"])["Sales"].mean()
#avgqtr = forecast.groupby(pd.PeriodIndex(forecast['date'], freq="M"))['Sales'].mean()
avgmth_prod = avgmth_prod.to_frame()
avgmth_prod.reset_index(inplace=True)

avgmth_prod["Sales"] = avgmth_prod["Sales"].apply(lambda x: round(x, 2)) 
avgmth_prod['date'] = avgmth_prod['Year'].astype(str) +"-"+ avgmth_prod['Month'].astype(str)

avgmth_prod.head()

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.xticks(rotation = 45)

plt.title("Average Monthly Sales Over Time by Product")

sns.lineplot(data = avgqtr_prod, x="date", y="Sales", hue="Product", palette=palette)

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.xticks(rotation = 45)

plt.title("Average Monthly Sales Over Time by Product")

sns.barplot(data = avgqtr_prod, x="date", y="Sales", hue="Product", palette=palette)

## Heatmap

## Profile Report

In [None]:
#!pip uninstall -y pandas-profiling

In [None]:
#pip install pandas-profiling

In [None]:
from pandas_profiling import ProfileReport

#import pandas_profiling as pp

In [None]:
from pandas_profiling import ProfileReport

profile = ProfileReport(forecast, tsmode=True, sortby="date")
profile.to_file('profile_report.html')