In [3]:
import pandas as pd
import statistics
import numpy as np

# Declaration
data = pd.read_csv("shopping_trends.csv")

target_col = ['Age','Purchase Amount (USD)','Review Rating','Previous Purchases']

for dt in target_col:
    # Convert to int
    data[dt] = pd.to_numeric(data[dt], errors='coerce')
    
    # Remove NaN
    data = data.dropna(subset=[dt])

    max_val = data[dt].max()
    min_val = data[dt].min()
    range_val = max_val - min_val
    mean = round(data[dt].mean(),2)
    mode = round(statistics.mode(data[dt]),2)
    var = round(np.nanvar(data[dt],ddof=1),2)
    std = round(var ** 0.5,2)

    print(f"Max of {dt.title()} : {max_val}")
    print(f"Min of {dt.title()} : {min_val}")
    print(f"Range of {dt.title()} : {range_val}")
    print(f"Mean of {dt.title()} : {mean}")
    print(f"Mode of {dt.title()} : {mode}")
    print(f"Variance of {dt.title()} : {var}")
    print(f"Standard Deviance of {dt.title()} : {std}\n")

Max of Age : 70
Min of Age : 18
Range of Age : 52
Mean of Age : 44.07
Mode of Age : 69
Variance of Age : 231.27
Standard Deviance of Age : 15.21

Max of Purchase Amount (Usd) : 100
Min of Purchase Amount (Usd) : 20
Range of Purchase Amount (Usd) : 80
Mean of Purchase Amount (Usd) : 59.76
Mode of Purchase Amount (Usd) : 36
Variance of Purchase Amount (Usd) : 561.0
Standard Deviance of Purchase Amount (Usd) : 23.69

Max of Review Rating : 5.0
Min of Review Rating : 2.5
Range of Review Rating : 2.5
Mean of Review Rating : 3.75
Mode of Review Rating : 3.4
Variance of Review Rating : 0.51
Standard Deviance of Review Rating : 0.71

Max of Previous Purchases : 50
Min of Previous Purchases : 1
Range of Previous Purchases : 49
Mean of Previous Purchases : 25.35
Mode of Previous Purchases : 31
Variance of Previous Purchases : 208.72
Standard Deviance of Previous Purchases : 14.45



In [30]:
import pandas as pd
import statistics
import numpy as np

# Declaration
# Descriptive Statistic for Analyzed Purchase Ammount and Age for each states
data = pd.read_csv("shopping_trends.csv")

location = data['Location'].unique().tolist()

results = []

for dt in location:
    location_data = data[data['Location'] == dt]

    total_purchase = location_data['Purchase Amount (USD)'].count()
    min_amount = location_data['Purchase Amount (USD)'].min()
    max_amount = location_data['Purchase Amount (USD)'].max()
    avg_amount = location_data['Purchase Amount (USD)'].mean()
    min_age = location_data['Age'].min()
    max_age = location_data['Age'].max()
    avg_age = location_data['Age'].mean()
    
    results.append({
        'Location': dt,
        'Total Purchased': total_purchase,
        'Min Purchase Amount': min_amount,
        'Max Purchase Amount': max_amount,
        'Average Purchase Amount': round(avg_amount,2),
        'Min Customer Age': min_age,
        'Max Customer Age': max_age,
        'Average Customer Age': round(avg_age,2)
    })

res_df = pd.DataFrame(results)
res_df = res_df.sort_values(by='Average Purchase Amount', ascending=False)
res_df

Unnamed: 0,Location,Total Purchased,Min Purchase Amount,Max Purchase Amount,Average Purchase Amount,Min Customer Age,Max Customer Age,Average Customer Age
29,Alaska,72,20,99,67.6,18,70,43.0
38,Pennsylvania,74,23,100,66.57,18,70,42.97
28,Arizona,65,22,100,66.55,19,70,45.28
8,West Virginia,81,24,100,63.88,18,70,42.54
22,Nevada,87,20,100,63.38,18,70,43.61
47,Washington,73,20,100,63.33,19,70,44.4
25,North Dakota,83,20,100,62.89,18,70,43.77
41,Virginia,77,22,100,62.88,18,70,41.61
40,Utah,71,20,100,62.58,18,70,43.31
49,Michigan,73,20,100,62.1,18,70,42.1
