In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from mlxtend.frequent_patterns import association_rules, apriori

In [None]:
data=pd.read_csv("../input/bakery/Bakery.csv")
data

#### First, we check the database for empty values

In [None]:
data.isnull().sum()

In [None]:
sns.heatmap(data.isnull())
plt.show()

### Let's learn data types

In [None]:
data.dtypes

### Let's correct the column names

In [None]:
data.rename(columns={"DataTime":"Date Time","Daypart":"Day Part","DayType":"Day Type"},inplace=True)
data

### With the DateTime column, divide by month, week, and hour

In [None]:
data["Year"]=pd.to_datetime(data["DateTime"]).dt.year
data["Month"]=pd.to_datetime(data["DateTime"]).dt.month
data["Week Day"]=pd.to_datetime(data["DateTime"]).dt.weekday
data["Hour"]=pd.to_datetime(data["DateTime"]).dt.hour

In [None]:
data

### Let's make some changes to the time division

In [None]:
### Replacing the names of the "Months"
data["Month"]=data["Month"].replace((1,2,3,4,5,6,7,8,9,10,11,12),('January','February' ,'March' ,'April' ,'May' ,'June' ,'July' ,'August' ,'September','October' ,'November' ,'December' ))
#### Replacing the names of the "Week Day"
data["Week Day"]=data["Week Day"].replace((0,1,2,3,4,5,6),('Monday','Tuesday' ,'Wednesday' ,'Thursday','Friday' ,'Saturday' ,'Sunday'))
### Changing hours
data["Hour"]=data["Hour"].replace((1, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23),('1-2','7-8','8-9','9-10','10-11','11-12','12-13','13-14','14-15','15-16','16-17','17-18','18-19','19-20','20-21','21-22','22-23','23-24'))

In [None]:
data

### 15 best-selling products

In [None]:
products=data["Items"].value_counts().head(15).reset_index(name="Count")
products=products.rename(columns={"index":"Items"})
plt.figure(figsize=(20,9))
colors=sns.color_palette("Paired")
ax=sns.barplot(x="Items",y="Count",data=products,palette=colors)
for i in ax.containers:
    ax.bar_label(i)
    plt.title("15 best-selling products",size=15)

### Sales by years

In [None]:
datayears=data.groupby("Year")["TransactionNo"].count().reset_index()
datayears

In [None]:
plt.figure(figsize=(10,5))
ax=sns.barplot(x="Year",y="TransactionNo",data=datayears,palette=colors)
for i in ax.containers:
    ax.bar_label(i)
    plt.title("Sales by years",size=15)

### Sales by Month

In [None]:
datamonth=data.groupby("Month")["TransactionNo"].count().reset_index()
datamonth

In [None]:
plt.figure(figsize=(20,9))
colors=sns.color_palette("Paired")
ax=sns.barplot(x="Month",y="TransactionNo",data=datamonth,palette=colors)
for i in ax.containers:
    ax.bar_label(i)
    plt.title("Sales by Month",size=15)

### Sales by Week Day

In [None]:
dataweek=data.groupby("Week Day")["TransactionNo"].count().reset_index()
dataweek

In [None]:
plt.figure(figsize=(15,5))
colors=sns.color_palette("Paired")
ax=sns.barplot(x="Week Day",y="TransactionNo",data=dataweek,palette=colors)
for i in ax.containers:
    ax.bar_label(i)
    plt.title("Sales by Week Day",size=15)

### Sales by Hour

In [None]:
datahour=data.groupby('Hour')["TransactionNo"].count().reset_index() 
datahour

In [None]:
plt.figure(figsize=(20,8))
colors=sns.color_palette("Paired")
ax=sns.barplot(x="TransactionNo",y="Hour",data=datahour,palette=colors)
for i in ax.containers:
    ax.bar_label(i)
plt.title("Sales by Hour",size=15)
plt.show()

### Sales by Day Part

In [None]:
datapart=data.groupby("Day Part")["TransactionNo"].count().reset_index()
datapart

In [None]:
plt.figure(figsize=(15,6))
colors=sns.color_palette("Paired")
ax=sns.barplot(x="Day Part",y="TransactionNo",data=datapart,palette=colors)
for i in ax.containers:
    ax.bar_label(i)
plt.title("Sales by Day Part",size=15)
plt.show()

### Product sales by parts of the day

In [None]:
dataparts=data.groupby(["Day Part","Items"])["TransactionNo"].count().reset_index().sort_values(["Day Part","TransactionNo"],ascending=False)
dayss=['Morning', 'Afternoon', 'Evening', 'Night']

plt.figure(figsize=(18,8))
colors=sns.color_palette('Paired')
for i,j in enumerate(dayss):
    plt.subplot(2,2,i+1)
    partsdata=dataparts[dataparts["Day Part"]==j].head(10)
    ax=sns.barplot(data=partsdata,x="TransactionNo",y="Items",palette=colors)
    for i in ax.containers:
        ax.bar_label(i)
        plt.xlabel('')
        plt.ylabel('')
        plt.title('Customers care to buy these products at '"{}" .format(j),size=13)

### Product sales by months

In [None]:
productmonths=data.groupby(["Month","Items"])["Items"].count().reset_index(name="Quantity").sort_values(["Month","Quantity"],ascending=False)
monthsales=['January', 'February', 'March', 'April', 'May', 'June','July', 'August', 'September', 'October', 'November', 'December']

plt.figure(figsize=(25,20))
colors=sns.color_palette('Paired')
for i,j in enumerate(monthsales):
    plt.subplot(4,3,i+1)
    pr=productmonths[productmonths["Month"]==j].head(10)
    ax=sns.barplot(data=pr,x="Quantity",y="Items",palette=colors)
    for i in ax.containers:
        ax.bar_label(i)
    plt.xlabel('')
    plt.ylabel('')
    plt.title('Sale for "{}"'.format(j), size=13)

### Apriori Algorithm

In [None]:
dataapriori=data.groupby(["TransactionNo","Items"])["Items"].count().reset_index(name="Quantity")
dataapriori

In [None]:
### Convert to Pivot Table
datapivot=dataapriori.pivot_table(index="TransactionNo",columns="Items",values="Quantity",aggfunc="sum").fillna(0)
datapivot

In [None]:
### You need to convert the data to 0 and 1 or True and False. Because the apriori algorithm works with them.
def table(x):
    if x<=0:
        return 0
    if x>=1:
        return 1

datapivottable=datapivot.applymap(table)
datapivottable

In [None]:
aprioridata=apriori(datapivottable,min_support=0.01,use_colnames=True)
aprioridata

In [None]:
rules=association_rules(aprioridata, metric = "lift", min_threshold = 1)
rules.sort_values("confidence",ascending=False).head(10)

In [None]:
### antecedent support: Probability of purchasing the first product
### consequent support: Probability of getting the second product
### support: Probability of buying both products at the same time
### confidence: Probability of getting the second when the first product is purchased
### lift: When the first product was purchased, the probability of getting the second increased by several percent