In [None]:
#Import libraries needed for analysis
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [None]:
#read the data
df = pd.read_csv("../input/groceries-dataset/Groceries_dataset.csv")

In [None]:
df.head()

In [None]:
#total unique product
all_products = df['itemDescription'].unique()
print("Total products: {}".format(len(all_products)))

In [None]:
#total product sold 
df['itemDescription'].value_counts()

In [None]:
#the most frequent buyer
df['Member_number'].value_counts()

In [None]:
#Convert the 'Date' column to datetime format
df['Date']= pd.to_datetime(df['Date'])
 
#Extracting year,month and day
df['year'] = df['Date'].apply(lambda x : x.year)
df['month'] = df['Date'].apply(lambda x : x.month)
df['day'] = df['Date'].apply(lambda x : x.day)
df['weekday'] = df['Date'].apply(lambda x : x.weekday())

#Rearranging the columns
df=df[['Member_number', 'Date','year', 'month', 'day','weekday','itemDescription']]
df.head()

In [None]:
#distribution of order in the span of days
plt.figure(figsize=(12,8))
plt.hist(df.day, align='mid', rwidth=0.5)
plt.ylabel('Count', fontsize=12)
plt.xlabel('Day of month', fontsize=12)
plt.xticks(rotation='vertical')
plt.title("Frequency of order by days", fontsize=12)
plt.show()

In [None]:
plt.figure(figsize=(12,8))
plt.plot((df.groupby(['day'])['itemDescription'].count()))
plt.ylabel('Number of Item', fontsize=12)
plt.xlabel('Days', fontsize=12)
plt.title("Total Transaction", fontsize=12)

In [None]:
plt.figure(figsize=(12,8))
plt.plot((df.groupby(['day'])['Member_number'].count()))
plt.ylabel('Number of Customer', fontsize=12)
plt.xlabel('Days', fontsize=12)
plt.title("Total Visit", fontsize=12)

In [None]:
#visualization of item sold
import plotly.express as px
Frequency_of_items = df.groupby(pd.Grouper(key='itemDescription')).size().reset_index(name='count')
fig = px.treemap(Frequency_of_items, path=['itemDescription'], values='count')
fig.update_layout(title_text='Frequency of the Items Sold',
                  title_x=0.5, title_font=dict(size=18)
                  )
fig.update_traces(textinfo="label+value")
fig.show()

In [None]:
baskets = df.groupby(['Member_number', 'itemDescription'])['itemDescription'].count().unstack().fillna(0).reset_index()
baskets.head()

In [None]:
# Encoding the items that sold more than 1
def one_hot_encoder(k):
    if k <= 0:
        return 0
    if k >= 1:
        return 1

In [None]:
baskets_final = baskets.iloc[:, 1:baskets.shape[1]].applymap(one_hot_encoder)
baskets_final.head()

In [None]:
# Finding the most frequent items sold together
frequent_itemsets = apriori(baskets_final, min_support=0.025, use_colnames=True, max_len=3).sort_values(by='support')
frequent_itemsets.head(25)

In [None]:
# Creating association rules for indicating astecedent and consequent items
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1).sort_values('lift', ascending=False)
rules = rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
rules.head(25)