In [5]:

import numpy as np
import pandas as pd

# Load the DataSet (Update the path to the correct location where you saved the file)
df = pd.read_csv(r"C:\Users\Dell\Downloads\data.csv", encoding="ISO-8859-1")

# Quick Preview of Dataset
print("✅ Dataset Preview:")
print(df.head())
print("\nColumns:", df.columns.tolist())
print("\nBasic Info:")
print(df.info())

# 1. Total Purchase Amount by Country (Group by 'Country' and sum the 'UnitPrice' * 'Quantity')
df['Total Purchase Amount (USD)'] = df['UnitPrice'] * df['Quantity']
total_purchase_country = df.groupby("Country")["Total Purchase Amount (USD)"].sum().reset_index()
print("\n✅ Total Purchase Amount by Country:")
print(total_purchase_country)

# 2. Total Purchase Amount by Product Category (Group by 'StockCode' as a proxy for 'Category')
total_purchase_category = df.groupby("StockCode")["Total Purchase Amount (USD)"].sum().reset_index()
print("\n✅ Total Purchase Amount by Product Category (StockCode):")
print(total_purchase_category)

# 3. Average Age by Location (Assuming 'Country' represents location and 'Age' might be an inferred field)
# As 'Age' is not available, we can skip this and perhaps use 'Country' or other fields.
# If 'Age' existed, we would use something like:
# avg_age_location = df.groupby("Country")["Age"].mean().reset_index()
print("\n✅ Average Age by Location (Not Available in Dataset):")
print("Age data is not present in the dataset. We can use 'Country' or other fields for grouping.")

# 4. Customer Count by Subscription Status (No Subscription Status directly in the dataset)
# We will assume that we can count unique 'CustomerID' by 'Country'
customer_count_country = df.groupby("Country")["CustomerID"].nunique().reset_index()
customer_count_country.rename(columns={"CustomerID": "Customer Count"}, inplace=True)
print("\n✅ Customer Count by Country:")
print(customer_count_country)

# 5. Multiple Aggregations: Summary by Gender (Gender is not available in the dataset)
# Let's aggregate by 'Country' or another available field instead
country_summary = df.groupby("Country").agg(
    Total_Spend=("Total Purchase Amount (USD)", "sum"),
    Avg_Spend=("Total Purchase Amount (USD)", "mean"),
    Count=("CustomerID", "nunique")
).reset_index()

print("\n✅ Summary by Country (Aggregated):")
print(country_summary)


✅ Dataset Preview:
  InvoiceNo StockCode                          Description  Quantity  \
0    536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER         6   
1    536365     71053                  WHITE METAL LANTERN         6   
2    536365    84406B       CREAM CUPID HEARTS COAT HANGER         8   
3    536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE         6   
4    536365    84029E       RED WOOLLY HOTTIE WHITE HEART.         6   

      InvoiceDate  UnitPrice  CustomerID         Country  
0  12/1/2010 8:26       2.55     17850.0  United Kingdom  
1  12/1/2010 8:26       3.39     17850.0  United Kingdom  
2  12/1/2010 8:26       2.75     17850.0  United Kingdom  
3  12/1/2010 8:26       3.39     17850.0  United Kingdom  
4  12/1/2010 8:26       3.39     17850.0  United Kingdom  

Columns: ['InvoiceNo', 'StockCode', 'Description', 'Quantity', 'InvoiceDate', 'UnitPrice', 'CustomerID', 'Country']

Basic Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541909 entries