In [None]:
import json
from itertools import groupby

In [None]:
pip install flatten_json

In [None]:
from flatten_json import flatten

In [None]:
json_file=json.load(open('sample-dataset-3 - Copy.json'))
json_file

In [None]:
#Create a function that transforms raw transaction data


In [None]:
from functools import reduce

def transform_transaction_data(transactions):
    # Flatten each transaction and calculate the total transaction value
    def flatten_transaction(transaction):
        total_value = sum(item['price'] * item['quantity'] for item in transaction['items'])
        return {
        'transaction_id': transaction['transaction_id'],
        'timestamp': transaction['timestamp'],
        'customer_id': transaction['customer']['id'],
        'customer_region': transaction['customer']['region'],
        'payment_method': transaction['payment_method'],
        'status': transaction['status'],
        'total_value': total_value,
        'items': [{'product_id': item['product_id'], 'quantity': item['quantity']} for item in transaction['items']],  # Ensure items is correctly structured
        'categories': [item['category'] for item in transaction['items']]
    }


    # Apply flattening to all transactions
    return list(map(lambda x: flatten_transaction(x), transactions))

In [None]:
#Extract unique product categories using map() and set()
def extract_unique_categories(transactions):
    # Use map() to extract categories from each transaction and flatten into a single list
    all_categories = list(reduce(lambda x, y: x + y, map(lambda x: x['categories'], transactions)))
    return set(all_categories)

In [None]:
#Create analysis functions
from collections import defaultdict

def regional_sales(transactions):
    region_sales = defaultdict(float)
    
    for transaction in transactions:
        region_sales[transaction['customer_region']] += transaction['total_value']
    
    return region_sales

In [None]:
def top_selling_products(transactions):
    product_sales = defaultdict(int)
    
    for transaction in transactions:
        # Ensure that 'items' is a list of dictionaries with 'product_id' and 'quantity'
        for item in transaction['items']:
            product_sales[item['product_id']] += item['quantity']
    
    # Sort products by total quantity sold in descending order
    return sorted(product_sales.items(), key=lambda x: x[1], reverse=True)

In [None]:
def average_transaction_value_by_payment(transactions):
    payment_method_sales = defaultdict(list)
    
    # Group transactions by payment method and store total values
    for transaction in transactions:
        payment_method_sales[transaction['payment_method']].append(transaction['total_value'])
    
    # Calculate the average for each payment method
    return {method: sum(values) / len(values) for method, values in payment_method_sales.items()}

In [None]:
# Create a report generation function


In [None]:
def filter_completed_transactions(transactions):
    return list(filter(lambda x: x['status'] == 'completed', transactions))

In [None]:
def sort_transactions(transactions):
    return sorted(transactions, key=lambda x: (x['customer_region'], x['total_value']), reverse=True)

In [None]:
from collections import defaultdict
from datetime import datetime

def generate_monthly_report(transactions):
    # Group transactions by month and year
    monthly_sales = defaultdict(float)
    
    for transaction in transactions:
        # Extract month and year from timestamp
        date = datetime.strptime(transaction['timestamp'], '%Y-%m-%dT%H:%M:%S')
        month_year = date.strftime('%Y-%m')
        
        # Aggregate sales by month-year
        monthly_sales[month_year] += transaction['total_value']
    
    return monthly_sales