In [1]:
import warnings
import seaborn as sns
import matplotlib.pyplot as plt
import squarify
import scipy.stats as stats
sns.set_theme(style="darkgrid")
sns.set(style='ticks')
%matplotlib inline
warnings.filterwarnings("ignore")

%run ../../src/model/run_script.ipynb

conf = get_conf()

trans = get_datasources(conf)["trans_info"]
item = get_datasources(conf)["item_info"]
stores = get_datasources(conf)["outlets_info"]

trans = pre_process_transaction_info(trans)
item = pre_process_item_info(item)
store = pre_process_stores_info(stores)
no_categories = conf['params']["no_categories"]

fast_moving_category = identify_fast_moving_category(trans, item, no_categories)


def monthly_sales_plot(trans, item):
    
    trans_item_1 = trans.merge(item, on='item_code', how="left").sort_values("week").reset_index(drop=True).dropna()
    trans_item_1['month'] = trans_item_1['week'].dt.month
    monthly_sales = trans_item_1.groupby('month')['total_sales'].sum().reset_index()

    plt.figure(figsize=(15, 6), dpi=80)
    sns.barplot(x='month', y='total_sales', data=monthly_sales, palette='cividis', ci=None)

    # Adding text labels to the bars
    y = monthly_sales['total_sales']
    for i, v in enumerate(y):
        plt.text(i, v + 1, str(v), ha='center', fontsize=12, fontweight="bold")

    plt.title('Distribution of Sales by Month', fontweight="bold", fontsize=16)
    plt.xlabel('Month', fontsize=13)
    plt.ylabel('Sales', fontsize=13)
    plt.grid(True)
    
    plt.show()

def week_month_sales_plot(trans, item):
    trans_item_2 = trans.merge(item, on='item_code', how="left").sort_values("week").reset_index(drop=True).dropna()
    trans_item_2['week_of_month'] = trans_item_2['week'].apply(lambda d: (d.day-1) // 7 + 1)
    weekly_sales = trans_item_2.groupby('week_of_month')['total_sales'].sum().reset_index()

    # Defining the custom y-axis labels
    x_labels = ['Week 1 (1-7 Days)', 'Week 2 (8-14 Days)', 'Week 3 (15-21 Days)', 'Week 4 (22-28 Days)', 'Week 5 (Above 28 Days)']

    plt.figure(figsize=(15, 6), dpi=80)
    sns.barplot(x='week_of_month', y='total_sales', data=weekly_sales, palette="rocket", ci=None)

    # Adding text labels to the bars
    y = weekly_sales['total_sales']
    for i, v in enumerate(y):
        plt.text(i, v + 1, str(v), ha='center', fontsize=12, fontweight="bold")

    plt.title('Distribution of Sales by the Week of Month', fontweight="bold", fontsize=16)
    plt.xlabel('Week of Month', fontsize=13)
    plt.ylabel('Sales', fontsize=13)
    plt.grid(True)
    plt.gca().set_xticklabels(x_labels)

    plt.show()
    
def date_month_sales_plot(trans, item):
    trans_item_3 = trans.merge(item, on='item_code', how="left").sort_values("week").reset_index(drop=True).dropna()
    trans_item_3['date_of_month'] = trans_item_3['week'].dt.day
    daily_sales = trans_item_3.groupby('date_of_month')['total_sales'].sum().reset_index()

    plt.figure(figsize=(15, 6), dpi=80)
    g = sns.lineplot(x='date_of_month', y='total_sales', data=daily_sales, marker="o", color="red", linewidth=2)

    g.set_xticks(range(32))

    # Setting the labels
    g.set_xticklabels(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16',
                       '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31'])

    plt.title("Distribution of Sales by the Date of Month", fontweight="bold", fontsize=16)
    plt.xlabel("Date of Month", fontsize=13)
    plt.ylabel("Sales", fontsize=13)
    plt.grid(True)
    plt.show()
    
def weekday_sales_plot():
    
    data = get_datasources(conf)["trans_info"]
    data = data.sort_values('DATE')
    data['DATE'] = pd.to_datetime(data['DATE'])
    data["sales_qty"] = data["sales_qty"].astype(int)
    data["item_code"] = data["item_code"].astype(int)
    
    # Group the data by week and weekday, and sum the sales quantities
    weekday_trans = data.groupby([pd.Grouper(key='DATE', freq='W-MON'), 
                                  data['DATE'].dt.weekday.rename('Weekday'), 
                                  'outlet_code', 'item_code']).agg({'sales_qty': 'sum'}).reset_index()
    weekday_names = {
        0: 'Monday',
        1: 'Tuesday',
        2: 'Wednesday',
        3: 'Thursday',
        4: 'Friday',
        5: 'Saturday',
        6: 'Sunday'}

    # Replace weekday numbers with weekday names
    weekday_trans['Weekday'] = weekday_trans['Weekday'].map(weekday_names)

    trans_item_4 = weekday_trans.merge(item, on='item_code', how="left").sort_values("DATE").reset_index(drop=True).dropna()

    weekday_sales = trans_item_4.groupby('Weekday')['sales_qty'].sum().reset_index()
    weekday_sales = weekday_sales.sort_values('sales_qty', ascending=False)

    plt.figure(figsize=(15, 6), dpi=80)
    sns.barplot(x='Weekday', y='sales_qty', data=weekday_sales, palette="mako", ci=None)

    # Add text labels to the bars
    y = weekday_sales['sales_qty']
    for i, v in enumerate(y):
        plt.text(i, v + 1, str(v), ha='center', fontsize=12, fontweight="bold")

    plt.xticks(rotation=90)
    plt.title('Distribution of Sales by the Day of Week', fontweight="bold", fontsize=16)
    plt.xlabel('Day of the Week', fontsize=13)
    plt.ylabel('Sales', fontsize=13)
    plt.grid(True)

    plt.show()
    
def outlet_sales_plot(trans, item):
    trans_item_5 = trans.merge(item, on='item_code', how="left").sort_values("week").reset_index(drop=True).dropna()
    store_sales = trans_item_5.groupby('outlet_code')['total_sales'].sum().sort_values(ascending=False)

    plt.figure(figsize=(10, 8))
    plt.title("Distribution of Sales by the Outlet", fontweight="bold", fontsize=16)

    labels = ['Store D', 'Store A', 'Store C', 'Store B', 'Store E']
    colors = ['#FFC107', '#FF5722', '#FF9800', '#F44336', '#FFEB3B']

    # Plotting the pie chart
    _, _, autotexts = plt.pie(store_sales, labels=labels, colors=colors,
                              autopct='%1.1f%%', startangle=90,
                              wedgeprops={'linewidth': 3.0, 'edgecolor': 'white'},
                              textprops={'size': 'x-large', 'fontweight': 'bold'},
                              pctdistance=0.7, shadow=True)

    # Adding a legend
    plt.legend(title='Outlet', loc='best', bbox_to_anchor=(1, 0.5))
    plt.ylabel('')

    # Customizing the text for percentages
    for autotext in autotexts:
        autotext.set_fontweight('bold')
        autotext.set_fontsize('large')

    plt.tight_layout()
    plt.show()
    
def sales_items_plot(trans, item):
    
    trans_item = trans.merge(item, on='item_code', how="left").sort_values("week").reset_index(drop=True).dropna()
    category_sales = trans_item.groupby('item_category')['total_sales'].sum().sort_values(ascending=False).head(10)

    plt.figure(figsize=(15, 10), dpi=80)
    sns.barplot(x=category_sales.values, y=category_sales.index, palette="Dark2", ci=None)

    # Adding text labels to the bars
    x = category_sales.values
    for i, v in enumerate(x):
        plt.text(v + 1, i, str(v), ha='left', va='center', fontsize=12, fontweight="bold")

    plt.title('Distribution of Sales by the Top Ten Item Categories', fontweight="bold", fontsize=16)
    plt.xlabel('Sales', fontsize=13)
    plt.ylabel('Item Category', fontsize=13)
    plt.grid(True)

    plt.show()
    
def day_sales_department_plot():
    
    data = get_datasources(conf)["trans_info"]
    data = data.sort_values('DATE')
    data['DATE'] = pd.to_datetime(data['DATE'])
    data["sales_qty"] = data["sales_qty"].astype(int)
    data["item_code"] = data["item_code"].astype(int)

    weekday_trans = data.groupby([pd.Grouper(key='DATE', freq='W-MON'),
                                  data['DATE'].dt.weekday.rename('Weekday'),
                                  'outlet_code', 'item_code']).agg({'sales_qty': 'sum'}).reset_index()

    weekday_names = {
        0: 'Monday',
        1: 'Tuesday',
        2: 'Wednesday',
        3: 'Thursday',
        4: 'Friday',
        5: 'Saturday',
        6: 'Sunday'
    }

    weekday_trans['Weekday'] = weekday_trans['Weekday'].map(weekday_names)

    trans_item_5 = weekday_trans.merge(item, on='item_code', how="left").sort_values("DATE").reset_index(drop=True).dropna()
    sales_day_dept = trans_item_5.groupby(['Weekday', 'item_department']).agg({'sales_qty': 'sum'}).reset_index()

    # Pivot the DataFrame to make each department a column
    sales_pivot_department = sales_day_dept.pivot(index='Weekday', columns='item_department', values='sales_qty')

    ax = sales_pivot_department.plot(kind='bar', figsize=(15, 6), width=0.8, color=['blue', 'green', 'red', 'orange'])

    # Add text labels to the bars
    for p in ax.patches:
        ax.annotate(str(int(p.get_height())), (p.get_x() + p.get_width() / 2., p.get_height()),
                    ha='center', va='center', xytext=(0, 5), textcoords='offset points', fontsize=12, fontweight="bold")

    plt.legend(title='Department', loc="upper left", bbox_to_anchor=(1.05, 1), ncol=1)
    ax.set_title('Distribution of Day of Week Sales by the Item Department', fontweight="bold", fontsize=16)
    ax.set_xlabel('Day of Week', fontsize=13)
    ax.set_ylabel('Sales', fontsize=13)
    plt.xticks(rotation=45)
    plt.grid(True)
    
    plt.show()
    
def top_5_fast_moving_plot(fast_moving_category):
    fig, axes = plt.subplots(nrows=5, ncols=3, figsize=(20, 25))

    # Iterate over each department and store
    for (i, j), ax in zip(fast_moving_category.groupby(['outlet_code', 'item_department']), axes.flatten()):
        outlet, department = i
        df_subset = j.sort_values('total_sales_qty', ascending=True)

        num_categories = len(df_subset)
        colors = plt.cm.plasma(np.linspace(0, 1, num_categories))

        bars = ax.barh(df_subset['row'], df_subset['total_sales_qty'], color=colors)
        ax.set_title(f'Outlet {outlet}, Department: {department}', fontsize=14)
        ax.set_xlabel('Total Sales Quantity', fontsize=12)
        ax.set_ylabel('Row', fontsize=12)
        ax.set_yticks(df_subset['row'])
        ax.set_yticklabels(df_subset['item_category'], fontsize=10, fontweight='bold')

        # Add data labels to the bars
        for bar in bars:
            width = bar.get_width()
            ax.text(width + 100, bar.get_y() + bar.get_height() / 2, f'{int(width)}', ha='left',
                    va='center', fontsize=12, fontweight='bold')
            if width > ax.get_xlim()[1] - 100:
                ax.text(ax.get_xlim()[1] - 100, bar.get_y() + bar.get_height() / 2, f'{int(width)}',
                        ha='right', va='center', fontsize=12, fontweight='bold')

        ax.invert_yaxis()

        ax.grid(axis='x', linestyle='-', linewidth=0.5)

        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)

    # Add top 5 fast-moving category labels
    for i, row in enumerate(axes):
        middle_ax = row[len(row)//2]  # Get the middle axis of the row
        outlet_name = fast_moving_category['outlet_code'].unique()[i]  # Get the outlet name
        middle_ax.text(0.5, 1.2, f'\nTop 5 Fast Moving Categories - Outlet {outlet_name}\n',
                       transform=middle_ax.transAxes, ha='center', va='center', fontsize=14,
                       fontweight='bold')

    fig.subplots_adjust(hspace=0.7)

    plt.suptitle('Distribution of Top 5 Fast Moving Items in each Department and in each Outlet', fontsize=16, fontweight='bold')

    plt.tight_layout(rect=[0, 0, 1, 0.97])
    plt.show()
    
def sales_outlet_area_plot():
    df = (
        trans.merge(item, on='item_code', how="left")
        .merge(store, on='outlet_code', how="left")
        .fillna(0))
    
    sales_outlet_area = df.groupby('outlet_area')['total_sales'].sum().reset_index()

    values = [51562, 200419, 249788, 148003, 174762]
    labels = ['(10,000 sq. ft)\n 51,562', '(10,150 sq. ft)\n 200,419', '(11,237 sq. ft)\n 249,788',
              '(11,500 sq. ft)\n 148,003', '(14,425 sq. ft)\n 174,762']

    color_palette = sns.color_palette("rocket", len(values))

    plt.figure(figsize=(10, 4))

    # Plot the Treemap
    squarify.plot(sizes=values, label=labels,
                  pad=0.2,
                  text_kwargs={'fontsize': 10, 'color': 'white', 'weight': 'bold'},
                  color=color_palette)

    color_legend = [plt.Rectangle((0, 0), 1, 1, color=color_palette[i]) for i in range(len(values))]
    legend = plt.legend(color_legend, labels, loc='center left', bbox_to_anchor=(1, 0.5))

    legend.set_title("Outlet Area")
    legend.get_frame().set_linewidth(0)

    plt.axis("off")

    plt.title("Distribution of Sales by the Outlet Area", fontweight="bold", fontsize=14)
    plt.tight_layout(rect=[0, 0, 0.8, 1])

    plt.show()
    
def sales_outlet_parking_plot(trans, item, store):
    
    df = (trans.merge(item, on='item_code', how='left')
        .merge(store, on='outlet_code', how='left')
        .fillna(0))
    
    sales_outlet_parking = df.groupby('outlet_parking_lots')['total_sales'].sum().reset_index()
    
    # Defining the custom y-axis labels
    x_labels = ['Outlets with 12 parking lots', 'Outlets with 41 parking lots', 
                'Outlets with 50 parking lots', 'Outlets with 52 parking lots', 
                'Outlets with 68 parking lots']

    plt.figure(figsize=(15, 6), dpi=80)
    sns.barplot(x='outlet_parking_lots', y='total_sales', data=sales_outlet_parking, palette='husl', ci=None)

    # Adding text labels to the bars
    y = sales_outlet_parking['total_sales']
    for i, v in enumerate(y):
        plt.text(i, v + 1, str(v), ha='center', fontsize=12, fontweight="bold")

    plt.title('Distribution of Sales by the No.of Outlet Parking Lots', fontweight="bold", fontsize=16)
    plt.xlabel('Number of parking lots in an outlet', fontsize=13)
    plt.ylabel('Sales', fontsize=13)
    plt.grid(True)
    plt.gca().set_xticklabels(x_labels)
    
    plt.show()

def sales_outlet_profile_plot(trans, item, store):
    
    df = (trans.merge(item, on='item_code', how='left')
        .merge(store, on='outlet_code', how='left')
        .fillna(0))
    
    sales_outlet_profile = df.groupby('outlet_profile_category')['total_sales'].sum().reset_index()

    plt.figure(figsize=(10, 8))

    # Plotting the Donut chart
    _, _, autotexts = plt.pie(
        sales_outlet_profile['total_sales'],
        labels=sales_outlet_profile['outlet_profile_category'],  # Use the correct column name here
        colors=plt.cm.tab10.colors,
        autopct='%1.1f%%',
        startangle=90,
        wedgeprops={'linewidth': 3.0, 'edgecolor': 'white'},
        textprops={'size': 'x-large', 'fontweight': 'bold'},
        pctdistance=0.7,
        shadow=True
    )

    center_circle = plt.Circle((0, 0), 0.5, color='white')
    fig = plt.gcf()
    fig.gca().add_artist(center_circle)

    plt.axis('equal')

    # Adding a legend
    plt.legend(title='Outlet Profile Category', loc='upper right', bbox_to_anchor=(1.15, 1.15))
    plt.ylabel('')
    plt.title("Distribution of Sales by the Outlet Profile Category", fontweight="bold", fontsize=16)

    for autotext in autotexts:
        autotext.set_fontweight('bold')
        autotext.set_fontsize('13')

    plt.tight_layout()
    plt.gca().set_xlabel('')

    plt.show()
    
def outlet_area_correlation_analysis():
    
    df = (trans.merge(item, on='item_code', how='left')
        .merge(store, on='outlet_code', how='left')
        .fillna(0))
    sales_outlet_area = df.groupby('outlet_area')['total_sales'].sum().reset_index()

    # Performing correlation analysis
    correlation_coefficient_1, p_value_1 = stats.pearsonr(sales_outlet_area['outlet_area'], sales_outlet_area['total_sales'])

    return correlation_coefficient_1, p_value_1

def outlet_parking_correlation_analysis():
    
    df = (trans.merge(item, on='item_code', how='left')
        .merge(store, on='outlet_code', how='left')
        .fillna(0))
    sales_outlet_parking = df.groupby('outlet_parking_lots')['total_sales'].sum().reset_index()

    # Performing correlation analysis
    correlation_coefficient_2, p_value_2 = stats.pearsonr(sales_outlet_parking['outlet_parking_lots'], sales_outlet_parking['total_sales'])

    return correlation_coefficient_2, p_value_2