In [3]:
import pandas as pd
import statistics
import numpy as np

# Declaration
data = pd.read_csv("shopping_trends.csv")

target_col = ['Age','Purchase Amount (USD)','Review Rating','Previous Purchases']

for dt in target_col:
    # Convert to int
    data[dt] = pd.to_numeric(data[dt], errors='coerce')
    
    # Remove NaN
    data = data.dropna(subset=[dt])

    max_val = data[dt].max()
    min_val = data[dt].min()
    range_val = max_val - min_val
    mean = round(data[dt].mean(),2)
    mode = round(statistics.mode(data[dt]),2)
    var = round(np.nanvar(data[dt],ddof=1),2)
    std = round(var ** 0.5,2)

    print(f"Max of {dt.title()} : {max_val}")
    print(f"Min of {dt.title()} : {min_val}")
    print(f"Range of {dt.title()} : {range_val}")
    print(f"Mean of {dt.title()} : {mean}")
    print(f"Mode of {dt.title()} : {mode}")
    print(f"Variance of {dt.title()} : {var}")
    print(f"Standard Deviance of {dt.title()} : {std}\n")

Max of Age : 70
Min of Age : 18
Range of Age : 52
Mean of Age : 44.07
Mode of Age : 69
Variance of Age : 231.27
Standard Deviance of Age : 15.21

Max of Purchase Amount (Usd) : 100
Min of Purchase Amount (Usd) : 20
Range of Purchase Amount (Usd) : 80
Mean of Purchase Amount (Usd) : 59.76
Mode of Purchase Amount (Usd) : 36
Variance of Purchase Amount (Usd) : 561.0
Standard Deviance of Purchase Amount (Usd) : 23.69

Max of Review Rating : 5.0
Min of Review Rating : 2.5
Range of Review Rating : 2.5
Mean of Review Rating : 3.75
Mode of Review Rating : 3.4
Variance of Review Rating : 0.51
Standard Deviance of Review Rating : 0.71

Max of Previous Purchases : 50
Min of Previous Purchases : 1
Range of Previous Purchases : 49
Mean of Previous Purchases : 25.35
Mode of Previous Purchases : 31
Variance of Previous Purchases : 208.72
Standard Deviance of Previous Purchases : 14.45



In [30]:
import pandas as pd
import statistics
import numpy as np

# Declaration
# Descriptive Statistic for Analyzed Purchase Ammount and Age for each states
data = pd.read_csv("shopping_trends.csv")

location = data['Location'].unique().tolist()

results = []

for dt in location:
    location_data = data[data['Location'] == dt]

    total_purchase = location_data['Purchase Amount (USD)'].count()
    min_amount = location_data['Purchase Amount (USD)'].min()
    max_amount = location_data['Purchase Amount (USD)'].max()
    avg_amount = location_data['Purchase Amount (USD)'].mean()
    min_age = location_data['Age'].min()
    max_age = location_data['Age'].max()
    avg_age = location_data['Age'].mean()
    
    results.append({
        'Location': dt,
        'Total Purchased': total_purchase,
        'Min Purchase Amount': min_amount,
        'Max Purchase Amount': max_amount,
        'Average Purchase Amount': round(avg_amount,2),
        'Min Customer Age': min_age,
        'Max Customer Age': max_age,
        'Average Customer Age': round(avg_age,2)
    })

res_df = pd.DataFrame(results)
res_df = res_df.sort_values(by='Average Purchase Amount', ascending=False)
res_df

Unnamed: 0,Location,Total Purchased,Min Purchase Amount,Max Purchase Amount,Average Purchase Amount,Min Customer Age,Max Customer Age,Average Customer Age
29,Alaska,72,20,99,67.6,18,70,43.0
38,Pennsylvania,74,23,100,66.57,18,70,42.97
28,Arizona,65,22,100,66.55,19,70,45.28
8,West Virginia,81,24,100,63.88,18,70,42.54
22,Nevada,87,20,100,63.38,18,70,43.61
47,Washington,73,20,100,63.33,19,70,44.4
25,North Dakota,83,20,100,62.89,18,70,43.77
41,Virginia,77,22,100,62.88,18,70,41.61
40,Utah,71,20,100,62.58,18,70,43.31
49,Michigan,73,20,100,62.1,18,70,42.1


In [42]:
import pandas as pd
import statistics
import numpy as np

# Declaration
# Descriptive Statistic for Analyzed Category, Size, Color, and Payment Method for each states
data = pd.read_csv("shopping_trends.csv")

location = data['Location'].unique().tolist()

results = []

for dt in location:
    location_data = data[data['Location'] == dt]
    
    total_purchase = location_data['Purchase Amount (USD)'].count()
    
    # Most and least size
    size_counts = location_data['Size'].value_counts()
    most_size = size_counts.idxmax() if not size_counts.empty else None
    least_size = size_counts.idxmin() if not size_counts.empty else None
    
    # Most and least category
    category_counts = location_data['Category'].value_counts()
    most_category = category_counts.idxmax() if not category_counts.empty else None
    least_category = category_counts.idxmin() if not category_counts.empty else None
    
    # Most and least color
    color_counts = location_data['Color'].value_counts()
    most_color = color_counts.idxmax() if not color_counts.empty else None
    least_color = color_counts.idxmin() if not color_counts.empty else None

    # Most and least payment method
    payment_counts = location_data['Payment Method'].value_counts()
    most_payment = payment_counts.idxmax() if not payment_counts.empty else None
    least_payment = payment_counts.idxmin() if not payment_counts.empty else None
    
    results.append({
        'Location': dt,
        'Total Purchased': total_purchase,
        'Most Size': most_size,
        'Least Size': least_size,
        'Most Category': most_category,
        'Least Category': least_category,
        'Most Color': most_color,
        'Least Color': least_color,
        'Most Payment': most_payment,
        'Least Payment': least_payment
    })

res_df = pd.DataFrame(results)
res_df = res_df.sort_values(by='Total Purchased', ascending=False)
res_df

Unnamed: 0,Location,Total Purchased,Most Size,Least Size,Most Category,Least Category,Most Color,Least Color,Most Payment,Least Payment
6,Montana,96,M,XL,Clothing,Outerwear,Gray,Blue,Debit Card,Venmo
18,California,95,M,S,Clothing,Outerwear,Turquoise,White,Cash,PayPal
37,Idaho,93,M,XL,Clothing,Outerwear,Black,Charcoal,PayPal,Credit Card
26,Illinois,92,M,XL,Clothing,Outerwear,Olive,White,Debit Card,Venmo
15,Alabama,89,M,XL,Clothing,Outerwear,Magenta,Lavender,Bank Transfer,Debit Card
46,Minnesota,88,M,XL,Clothing,Outerwear,Purple,Indigo,Cash,Bank Transfer
43,Nebraska,87,M,S,Accessories,Outerwear,Turquoise,Beige,Venmo,Credit Card
14,New York,87,M,XL,Clothing,Outerwear,Teal,Olive,Cash,PayPal
22,Nevada,87,M,XL,Clothing,Outerwear,Orange,Turquoise,Bank Transfer,PayPal
33,Maryland,86,M,S,Clothing,Outerwear,Olive,Blue,Cash,Bank Transfer


In [36]:
import pandas as pd
import statistics
import numpy as np

pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.width', 0)

# Declaration
# Descriptive Statistic for Average Age, Purchase Amount, and Review Rating for Every Item Purchased, Location, Season, and Category
data = pd.read_csv("shopping_trends.csv")

# Factors
context = ['Age', 'Purchase Amount (USD)', 'Review Rating']

# Target Col 
target_col = ['Item Purchased','Location','Season','Category']

for dt in target_col:
    # Group by Target Column and calculate average
    stats = data.groupby(dt)[context].mean().round(2).sort_values(by='Purchase Amount (USD)',ascending=False)
    
    # Define Average Age Status Based on Average Age
    average_age_all = data['Age'].mean().round(2)
    def define_age_status(age):
        if age >= average_age_all:
            return "Normal"
        else:
            return "Below Average"
    
    stats['Age Status'] = stats['Age'].apply(define_age_status)
    
    # Define Average Purchase Amount (USD) Status Based on Average Purchase Amount (USD)
    average_purchase_all = data['Purchase Amount (USD)'].mean().round(2)
    def define_purchase_status(purchase):
        if purchase >= average_purchase_all:
            return "Normal"
        else:
            return "Below Average"
    
    stats['Purchase (USD) Status'] = stats['Purchase Amount (USD)'].apply(define_purchase_status)
    
    # Define Average Review Rating Status Based on Average Review Rating
    average_rating_all = data['Review Rating'].mean().round(2)
    def define_rating_status(rating):
        if rating >= average_rating_all:
            return "Normal"
        else:
            return "Below Average"
    
    stats['Rating Status'] = stats['Review Rating'].apply(define_rating_status)
    
    # Rename column
    stats.rename(columns={'Purchase Amount (USD)': 'Purchase (USD)'}, inplace=True)
    stats.rename(columns={'Review Rating': 'Rating'}, inplace=True)
    
    # Reorder columns
    stats = stats[['Age', 'Age Status', 'Purchase (USD)', 'Purchase (USD) Status', 'Rating', 'Rating Status']]
    
    print(f"Average Age, Purchase Amount, and Review Rating for Every {dt}\n")
    print(f"Average Age : {average_age_all}\n")
    print(f"Average Purchase Amount : {average_purchase_all}\n")
    print(f"Average Review Rating : {average_rating_all}\n")
    print(stats)
    print("\n================================================================================================\n")

Average Age, Purchase Amount, and Review Rating for Every Item Purchased

Average Age : 44.07

Average Purchase Amount : 59.76

Average Review Rating : 3.75

                  Age     Age Status  Purchase (USD) Purchase (USD) Status  Rating  Rating Status
Item Purchased                                                                                   
T-shirt         42.84  Below Average           62.91                Normal    3.78         Normal
Boots           44.85         Normal           62.62                Normal    3.81         Normal
Dress           44.60         Normal           62.17                Normal    3.75         Normal
Shoes           46.13         Normal           61.60                Normal    3.75         Normal
Shirt           41.99  Below Average           61.14                Normal    3.63  Below Average
Scarf           44.32         Normal           60.90                Normal    3.70  Below Average
Blouse          44.29         Normal           60.88      

In [58]:
import pandas as pd
import statistics
import numpy as np

pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.width', 0)

# Declaration
# Descriptive Statistic for Total Shipping Type, Season, Category for Each Location
data = pd.read_csv("shopping_trends.csv")

# Target Col 
target_col = ['Shipping Type','Season','Category']

for dt in target_col:    
    shipping_counts = data.groupby('Location')[dt].value_counts().unstack(fill_value=0)
    print(f"Total {dt} for Each Location\n")
    print(shipping_counts)
    print("\n")

Total Shipping Type for Each Location

Shipping Type   2-Day Shipping  Express  Free Shipping  Next Day Air  Standard  Store Pickup
Location                                                                                    
Alabama                     16       20              9            17         8            19
Alaska                      13       13             14             8        17             7
Arizona                     10       12             10            11        10            12
Arkansas                    12       16             14            17        11             9
California                  14       16             16            20        16            13
Colorado                    12        4             13            14        14            18
Connecticut                 16       11              6            18        12            15
Delaware                    14       14             20            16         9            13
Florida                     10 