In [2]:
import pandas as pd
import numpy as np
from random import randint, choice
from datetime import datetime, timedelta

# Sample Data
customers = range(1, 101)
genders = ['Male', 'Female']
product_subcategories = ['Fruits', 'Vegetables', 'Dairy', 'Bakery', 'Meat', 'Seafood', 'Beverages', 'Snacks', 'Frozen Foods', 'Cleaning Supplies']
start_date = datetime.now() - timedelta(days=730)
end_date = datetime.now()

# Create the dataset
data = []

for customer_id in customers:
    age = randint(18, 75)
    gender = choice(genders)
    num_products = randint(1, 10)
    purchase_date = start_date + (end_date - start_date) * np.random.random()
    purchase_date = purchase_date.strftime('%Y-%m-%d')

    for _ in range(num_products):
        subcategory = choice(product_subcategories)
        quantity = randint(1, 20)
        data.append([customer_id, age, gender, subcategory, purchase_date, quantity])

# Create a DataFrame
columns = ['Customer ID', 'Customer Age', 'Customer Gender', 'Product Subcategory', 'Date', 'Quantity Purchased']
df = pd.DataFrame(data, columns=columns)

# Save to Excel
df.to_excel('SupermarketData.xlsx', index=False)


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd

df = pd.read_excel('SupermarketData.xlsx')


In [4]:
import plotly.express as px

# Group by product subcategory and sum the quantities
subcategory_totals = df.groupby('Product Subcategory')['Quantity Purchased'].sum().reset_index()

# Create interactive bar chart
fig = px.bar(subcategory_totals, x='Product Subcategory', y='Quantity Purchased',
             title='Total Quantity Purchased by Product Subcategory',
             labels={'Product Subcategory': 'Product Subcategory', 'Quantity Purchased': 'Total Quantity Purchased'})
fig.show()


In [5]:
# Count the number of customers by gender
gender_counts = df['Customer Gender'].value_counts().reset_index()
gender_counts.columns = ['Customer Gender', 'Count']

# Create interactive pie chart
fig = px.pie(gender_counts, values='Count', names='Customer Gender',
             title='Gender Distribution of Customers',
             labels={'Customer Gender': 'Gender', 'Count': 'Count'},
             hole=0.3)
fig.show()


In [6]:
# Create interactive histogram
fig = px.histogram(df, x='Customer Age', nbins=15,
                   title='Customer Age Distribution',
                   labels={'Customer Age': 'Age', 'count': 'Frequency'})
fig.show()


In [7]:
# Create interactive box plot
fig = px.box(df, x='Product Subcategory', y='Quantity Purchased',
             title='Quantity Purchased by Product Subcategory',
             labels={'Product Subcategory': 'Product Subcategory', 'Quantity Purchased': 'Quantity Purchased'})
fig.show()


In [8]:
# Summarize data by product subcategory and gender
subcategory_gender_totals = df.groupby(['Product Subcategory', 'Customer Gender'])['Quantity Purchased'].sum().reset_index()

# Create interactive treemap
fig = px.treemap(subcategory_gender_totals, path=['Product Subcategory', 'Customer Gender'], values='Quantity Purchased',
                 title='Quantity Purchased by Product Subcategory and Gender',
                 labels={'Quantity Purchased': 'Total Quantity Purchased', 'Product Subcategory': 'Product Subcategory', 'Customer Gender': 'Gender'})
fig.show()


In [9]:
# Summarize data by customer age and product subcategory
age_subcategory_totals = df.groupby(['Customer Age', 'Product Subcategory'])['Quantity Purchased'].sum().reset_index()

# Create interactive bubble chart
fig = px.scatter(age_subcategory_totals, x='Customer Age', y='Quantity Purchased',
                 size='Quantity Purchased', color='Product Subcategory', hover_name='Product Subcategory',
                 title='Quantity Purchased by Customer Age and Product Subcategory',
                 labels={'Customer Age': 'Customer Age', 'Quantity Purchased': 'Quantity Purchased'})
fig.show()


In [10]:
# Summarize data by product subcategory and gender
sunburst_data = df.groupby(['Product Subcategory', 'Customer Gender'])['Quantity Purchased'].sum().reset_index()

# Create interactive sunburst chart
fig = px.sunburst(sunburst_data, path=['Product Subcategory', 'Customer Gender'], values='Quantity Purchased',
                  title='Sunburst Chart of Quantity Purchased by Product Subcategory and Gender',
                  color='Quantity Purchased', color_continuous_scale='RdBu',
                  labels={'Quantity Purchased': 'Total Quantity Purchased', 'Product Subcategory': 'Product Subcategory', 'Customer Gender': 'Gender'})
fig.show()


In [11]:
import plotly.express as px
import random

# Add a random US state to each customer for the purpose of this visualization
us_states = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY']
df['State'] = [random.choice(us_states) for _ in range(len(df))]

# Summarize data by state
state_totals = df.groupby('State')['Quantity Purchased'].sum().reset_index()

# Create interactive choropleth map
fig = px.choropleth(state_totals, locations='State', locationmode='USA-states', color='Quantity Purchased',
                    scope='usa', title='Choropleth Map of Quantity Purchased by State',
                    labels={'Quantity Purchased': 'Total Quantity Purchased'})
fig.show()
