In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
df = pd.read_csv('data/PjzN2QI-QDydg8ZRDwm30w_29358438dddc4ad88247c4332fd1b7f1_CSV-file.csv')

In [3]:
print(df.head())

         Country           City  PO/ORDER#        Categories     SKU  QTY ORD  \
0  United States  NEW YORK CITY     102981    Fresh Packaged  FP2020       52   
1  United States  NEW YORK CITY     102980    Organic Beauty  OY2545       52   
2  United States    Los Angeles     102979    Organic Beauty  OY2545        3   
3  United States    Los Angeles     102978  Healthy Beverage  HB1016        3   
4  United States    Los Angeles     102977           Healthy  HT1064       90   

  CHANNEL WAREHOUSE Pick-Up / Delivery   Customer  SALES $  EXPECTED  
0  Retail      Kern           Delivery  Customer1     6500     45236  
1  Retail      Kern           Delivery  Customer1     6500     45274  
2  Retail      Kern           Delivery  Customer1      375     45272  
3  Retail      Kern           Delivery  Customer1      375     45206  
4  Retail      Kern           Delivery  Customer1    11250     45213  


In [4]:
# Format columns as requested
df['SALES $'] = df['SALES $'].apply(lambda x: f"${x:,.2f}")
df['EXPECTED'] = pd.to_datetime(df['EXPECTED'], format='%Y%m%d', errors='coerce')
df['QTY ORD'] = pd.to_numeric(df['QTY ORD'], errors='coerce')
print(df.head())

         Country           City  PO/ORDER#        Categories     SKU  QTY ORD  \
0  United States  NEW YORK CITY     102981    Fresh Packaged  FP2020       52   
1  United States  NEW YORK CITY     102980    Organic Beauty  OY2545       52   
2  United States    Los Angeles     102979    Organic Beauty  OY2545        3   
3  United States    Los Angeles     102978  Healthy Beverage  HB1016        3   
4  United States    Los Angeles     102977           Healthy  HT1064       90   

  CHANNEL WAREHOUSE Pick-Up / Delivery   Customer     SALES $ EXPECTED  
0  Retail      Kern           Delivery  Customer1   $6,500.00      NaT  
1  Retail      Kern           Delivery  Customer1   $6,500.00      NaT  
2  Retail      Kern           Delivery  Customer1     $375.00      NaT  
3  Retail      Kern           Delivery  Customer1     $375.00      NaT  
4  Retail      Kern           Delivery  Customer1  $11,250.00      NaT  


In [8]:
# Convert SALES $ back to numeric by removing '$' and ',' and converting to float
df['SALES $'] = df['SALES $'].astype(str).str.replace('$', '', regex=False).str.replace(',', '', regex=False).astype(float)

# Calculate sales statistics
total_sales = df['SALES $'].sum()
qty_sum = df['QTY ORD'].sum()
qty_avg = df['QTY ORD'].mean()
qty_max = df['QTY ORD'].max()
qty_min = df['QTY ORD'].min()

# Create or update City_clean column with proper case
df['City_clean'] = df['City'].str.title()

# Calculate total number of orders (unique PO/ORDER#)
total_orders = df['PO/ORDER#'].nunique()

# Print results
print(f"Total Sales: ${total_sales:,.2f}")
print(f"\nQuantity Statistics:")
print(f"Sum: {qty_sum:,}")
print(f"Average: {qty_avg:.2f}")
print(f"Maximum: {qty_max:,}")
print(f"Minimum: {qty_min:,}")
print(f"\nTotal Number of Orders: {total_orders:,}")

# Display first few rows to verify City_clean column
print("\nFirst few rows with new City_clean column:")
print(df[['City', 'City_clean']].head())

Total Sales: $13,749,250.00

Quantity Statistics:
Sum: 109,994
Average: 38.19
Maximum: 1,248
Minimum: 1

Total Number of Orders: 2,880

First few rows with new City_clean column:
            City     City_clean
0  NEW YORK CITY  New York City
1  NEW YORK CITY  New York City
2    Los Angeles    Los Angeles
3    Los Angeles    Los Angeles
4    Los Angeles    Los Angeles


In [9]:
# Create pivot table for orders and quantities by country
country_pivot = pd.pivot_table(df, 
                             values=['PO/ORDER#', 'QTY ORD'],
                             index='Country',
                             aggfunc={'PO/ORDER#': 'count',  # Count of orders
                                    'QTY ORD': 'sum'})      # Sum of quantities

# Rename columns for clarity
country_pivot.columns = ['Number of Orders', 'Total Quantity']

# Sort by Number of Orders in descending order
country_pivot = country_pivot.sort_values('Number of Orders', ascending=False)

# Format the output
print("\nOrder and Quantity Analysis by Country:")
print("=====================================")
print(country_pivot)


Order and Quantity Analysis by Country:
               Number of Orders  Total Quantity
Country                                        
United States              2600          100867
Canada                      184            5651
Mexico                       96            3476


In [10]:
# Create pivot table for orders and quantities by product category
category_pivot = pd.pivot_table(df, 
                             values=['PO/ORDER#', 'QTY ORD'],
                             index='Categories',
                             aggfunc={'PO/ORDER#': 'count',  # Count of orders
                                    'QTY ORD': 'sum'})      # Sum of quantities

# Rename columns for clarity
category_pivot.columns = ['Number of Orders', 'Total Quantity']

# Sort by Number of Orders in descending order
category_pivot = category_pivot.sort_values('Number of Orders', ascending=False)

# Format the output
print("\nOrder and Quantity Analysis by Product Category:")
print("=====================================")
print(category_pivot)


Order and Quantity Analysis by Product Category:
                  Number of Orders  Total Quantity
Categories                                        
Fresh Packaged                 878           36569
Healthy Beverage               799           30728
Organic Beauty                 672           25498
Organic Pet                    346           12440
Healthy                         72            2676
Organic Frozen                  66            1303
Organic Baby                    47             780
