In [None]:
# Flatten nested items and calculate sales
flattened_items = []
for index, row in df.iterrows():
    for item in row['items']:
        # Extract relevant information
        name = item['name']
        tags = item['tags']
        quantity = int(item['quantity']) if 'quantity' in item else 0
        price = item['price']
        if isinstance(price, dict):
            price = float(price['$numberDecimal'])
        else:
            price = float(price)

        flattened_items.append({
            'name': name,
            'tags': tags,
            'quantity': quantity,
            'price': price,
        })

# Create a new DataFrame with flattened data
new_df = pd.DataFrame(flattened_items)

# Calculate sales for each product
new_df['sales'] = new_df['quantity'] * new_df['price']

# Rank products by total sales
top_products = new_df.groupby('name')['sales'].sum().reset_index()
top_products = top_products.sort_values(by='sales', ascending=False)
top_10_products = top_products.head(10)

# Rank store locations by total sales
store_rankings = new_df.groupby('storeLocation')['sales'].sum().reset_index()
store_rankings = store_rankings.sort_values(by='sales', ascending=False)
store_rankings['Rank'] = range(1, len(store_rankings) + 1)


In [None]:
# Top 3 products by store location
sales_by_store_and_product = new_df.groupby(['storeLocation', 'name'])['sales'].sum().reset_index()
sales_by_store_and_product = sales_by_store_and_product.sort_values(by=['storeLocation', 'sales'], ascending=[True, False])

top_3_products_by_store = {}

for store_location, group in sales_by_store_and_product.groupby('storeLocation'):
    top_3_products = group['name'].head(3).tolist()
    top_3_products_by_store[store_location] = top_3_products

# Top 3 products by location
sales_by_location = new_df.groupby(['storeLocation', 'name'])['quantity'].sum().reset_index()
sales_by_location = sales_by_location.sort_values(by=['storeLocation', 'quantity'], ascending=[True, False])
top_3_products_by_location = sales_by_location.groupby('storeLocation').head(3)

# Analyze purchase methods by gender
purchase_method = df['purchaseMethod']
gender = df['customer'].apply(lambda x: x.get('gender', 'Unknown'))
combined_df = pd.DataFrame({'purchaseMethod': purchase_method, 'gender': gender})
purchase_method_by_gender = pd.crosstab(combined_df['gender'], combined_df['purchaseMethod'])


In [None]:
# Convert saleDate to datetime
df['DateStr'] = df['saleDate'].apply(lambda x: x['$date'])
df['saleDate'] = pd.to_datetime(df['DateStr'], errors='coerce')
df['YearMonth'] = df['saleDate'].dt.strftime('%Y-%m')

# Calculate monthly total sales
monthly_total_sales = new_df.groupby(df['YearMonth'])['sales'].sum().reset_index()
