In [25]:
import pandas as pd
import plotly.express as px

# Load the Excel data
data_path = 'BlinkIT Grocery Data.csv'
df = pd.read_csv(data_path)


In [26]:
# Display the first few rows to understand the data
print("Dataset Preview:")
print(df.head())

# Ensure the columns are correctly named
print("\nColumns in the dataset:")
print(df.columns)

Dataset Preview:
  Item_Fat_Content Item Identifier              Item Type  \
0          Regular           FDX32  Fruits and Vegetables   
1          Low Fat           NCB42     Health and Hygiene   
2          Regular           FDR28           Frozen Foods   
3          Regular           FDL50                 Canned   
4          Low Fat           DRI25            Soft Drinks   

   Outlet Establishment Year Outlet Identifier Outlet Location Type  \
0                       2012            OUT049               Tier 1   
1                       2022            OUT018               Tier 3   
2                       2016            OUT046               Tier 1   
3                       2014            OUT013               Tier 3   
4                       2015            OUT045               Tier 2   

  Outlet Size        Outlet Type  Item Visibility  Item Weight     Sales  \
0      Medium  Supermarket Type1         0.100014        15.10  145.4786   
1      Medium  Supermarket Type2     

In [27]:
fat_content_sales = df.groupby('Item_Fat_Content')['Sales'].sum().reset_index()
donut_chart = px.pie(fat_content_sales, 
                     names='Item_Fat_Content', 
                     values='Sales', 
                     title='Sum of Sales by Item Fat Content', 
                     hole=0.4)
donut_chart.show()

In [29]:
# Create a Clustered Bar Chart: Number of items by Item Fat Content and Outlet Location Type
fat_content_location_count = df.groupby(['Item_Fat_Content', 'Outlet Location Type']).size().reset_index(name='Count')
clustered_bar_chart1 = px.bar(fat_content_location_count, 
                              x='Item_Fat_Content', 
                              y='Count', 
                              color='Outlet Location Type', 
                              barmode='group', 
                              title='Number of Items by Item Fat Content and Outlet Location Type')
clustered_bar_chart1.show()


In [44]:
# Sort the data in descending order by Sales
item_type_sales_sorted = item_type_sales.sort_values(by='Sales', ascending=False)

# Create the clustered bar chart with sorted data and a yellow color
clustered_bar_chart2 = px.bar(item_type_sales_sorted, 
                              x='Item Type', 
                              y='Sales', 
                              title='Item Type by Sum of Sales')

# Apply yellow color to all bars
clustered_bar_chart2.update_traces(marker=dict(color='yellow'))

# Adjust the x-axis tick labels and show the chart
clustered_bar_chart2.update_xaxes(tickangle=45)
clustered_bar_chart2.show()




In [36]:
# Create a Line Chart: Sum of Sales by Outlet Established Year
sales_by_year = df.groupby('Outlet Establishment Year')['Sales'].sum().reset_index()
line_chart = px.line(sales_by_year, 
                     x='Outlet Establishment Year', 
                     y='Sales', 
                     title='Sum of Sales by Outlet Established Year')
line_chart.show()

In [38]:
# Create a Funnel Chart: Outlet Location by Sales
location_sales = df.groupby('Outlet Location Type')['Sales'].sum().reset_index()
funnel_chart = px.funnel(location_sales, 
                         x='Sales', 
                         y='Outlet Location Type', 
                         title='Outlet Location by Sales')
funnel_chart.show()


In [40]:
avg_sales_item_type = df.groupby('Item Type')['Sales'].mean().reset_index()
avg_sales_chart = px.bar(avg_sales_item_type, 
                         x='Item Type', 
                         y='Sales', 
                         title='Average Sales per Item Type', 
                         labels={'Item_Outlet_Sales': 'Average Sales'})
avg_sales_chart.update_xaxes(tickangle=45)
avg_sales_chart.show()

In [42]:
numeric_columns = df.select_dtypes(include=['number'])
correlation_matrix = numeric_columns.corr()

# Create the heatmap
correlation_heatmap = px.imshow(correlation_matrix, 
                                title='Correlation Heatmap of Numeric Features', 
                                labels=dict(color='Correlation'))
correlation_heatmap.show()

