# Capstone Project: Supply Chain Monitoring and Optimization Platform

 Week 2 – Data Collection & Preprocessing in Python

✅ Week 2 – Data Collection & Preprocessing in Python

🔧 Tools: Python (Pandas, NumPy, Requests)

Capstone Tasks:

Use requests to fetch data from a sample API or local JSON/CSV

Clean data using pandas (drop nulls, format dates, etc.)

Perform basic calculations using numpy (e.g., delays, stock levels)

Display the cleaned and processed data

Deliverables:

  Python script for collecting and processing supply chain data

  Cleaned pandas DataFrame with processed outputs

## Step 1: Upload your CSV files to Colab

In [None]:
from google.colab import files
uploaded = files.upload()

Saving inventory.csv to inventory.csv
Saving orders.csv to orders.csv
Saving suppliers.csv to suppliers.csv


## Step 2: Import Libraries

In [None]:
import pandas as pd
import numpy as np
import requests  # optional if fetching from online API

## Step 3: Load the CSV Files

In [None]:
orders_df = pd.read_csv('orders.csv')
suppliers_df = pd.read_csv('suppliers.csv')
inventory_df = pd.read_csv('inventory.csv')

In [None]:
orders_df.head()

Unnamed: 0,order_id,product_id,supplier_id,quantity,order_Date,delivery_date,status
0,1,3,1,40,2025-07-03,2025-07-07,Shipped
1,2,7,4,15,2025-07-06,2025-07-11,Delivered
2,3,2,2,25,2025-07-01,2025-07-06,Pending
3,4,10,5,10,2025-07-10,2025-07-14,Cancelled
4,5,1,3,50,2025-07-02,2025-07-07,Delivered


In [None]:
suppliers_df.head()

Unnamed: 0,supplier_id,name,contact_info,location
0,1,ABC Traders,abc@gmail.com,Mumbai
1,2,Global Supplies,global@supplies.com,Delhi
2,3,Metro Distributors,metro@distributors.com,Chennai
3,4,FastMart,contact@fastmart.com,Hyderabad
4,5,QuickSupply,support@quicksupply.com,Bangalore


In [None]:
inventory_df.head()

Unnamed: 0,product_id,product_name,quantity_in_stock,reorder_level
0,1,Laptop,50,10
1,2,Mouse,200,30
2,3,Keyboard,150,25
3,4,Monitor,80,15
4,5,Printer,60,10


## Step 4: Clean the Data

In [None]:
# View actual column names for debugging
print("Orders columns:", orders_df.columns)
print("Suppliers columns:", suppliers_df.columns)
print("Inventory columns:", inventory_df.columns)

# Standardize column names (strip spaces and lowercase everything)
orders_df.columns = orders_df.columns.str.strip().str.lower()
suppliers_df.columns = suppliers_df.columns.str.strip().str.lower()
inventory_df.columns = inventory_df.columns.str.strip().str.lower()

# Drop any rows with null values in any DataFrame
orders_df.dropna(inplace=True)
suppliers_df.dropna(inplace=True)
inventory_df.dropna(inplace=True)

# Convert date columns to datetime (use error handling)
orders_df['order_date'] = pd.to_datetime(orders_df['order_date'], errors='coerce')
orders_df['delivery_date'] = pd.to_datetime(orders_df['delivery_date'], errors='coerce')

# Drop rows with invalid date entries
orders_df.dropna(subset=['order_date', 'delivery_date'], inplace=True)

# Rename columns for consistency
orders_df.rename(columns={
    'order_id': 'OrderID',
    'product_id': 'ProductID',
    'supplier_id': 'SupplierID',
    'quantity': 'Quantity',
    'order_date': 'OrderDate',
    'delivery_date': 'DeliveryDate',
    'status': 'Status'
}, inplace=True)

suppliers_df.rename(columns={
    'supplier_id': 'SupplierID',
    'name': 'SupplierName',
    'contact_info': 'ContactInfo',
    'location': 'Location'
}, inplace=True)

inventory_df.rename(columns={
    'product_id': 'ProductID',
    'product_name': 'ProductName',
    'quantity_in_stock': 'StockQuantity',
    'reorder_level': 'ReorderLevel'
}, inplace=True)


Orders columns: Index(['order_id', 'product_id', 'supplier_id', 'quantity', 'order_Date',
       'delivery_date', 'status'],
      dtype='object')
Suppliers columns: Index(['supplier_id', 'name', 'contact_info', 'location'], dtype='object')
Inventory columns: Index(['product_id', 'product_name', 'quantity_in_stock', 'reorder_level'], dtype='object')


In [None]:
print("\nCleaned Orders DataFrame:")
print(orders_df.head())


Cleaned Orders DataFrame:
   OrderID  ProductID  SupplierID  Quantity  OrderDate DeliveryDate     Status
0        1          3           1        40 2025-07-03   2025-07-07    Shipped
1        2          7           4        15 2025-07-06   2025-07-11  Delivered
2        3          2           2        25 2025-07-01   2025-07-06    Pending
3        4         10           5        10 2025-07-10   2025-07-14  Cancelled
4        5          1           3        50 2025-07-02   2025-07-07  Delivered


In [None]:
print("\nCleaned Suppliers DataFrame:")
print(suppliers_df.head())


Cleaned Suppliers DataFrame:
   SupplierID        SupplierName              ContactInfo   Location
0           1         ABC Traders            abc@gmail.com     Mumbai
1           2     Global Supplies      global@supplies.com      Delhi
2           3  Metro Distributors   metro@distributors.com    Chennai
3           4            FastMart     contact@fastmart.com  Hyderabad
4           5         QuickSupply  support@quicksupply.com  Bangalore


In [None]:
print("\nCleaned Inventory DataFrame:")
print(inventory_df.head())


Cleaned Inventory DataFrame:
   ProductID ProductName  StockQuantity  ReorderLevel
0          1      Laptop             50            10
1          2       Mouse            200            30
2          3    Keyboard            150            25
3          4     Monitor             80            15
4          5     Printer             60            10


## Step 5: Perform Basic Calculations

In [46]:
# 1. Calculate delay in days using NumPy
orders_df['DelayDays'] = (orders_df['DeliveryDate'] - orders_df['OrderDate']).dt.days

# 2. Replace negative delays with 0 (early deliveries)
orders_df['DelayDays'] = np.where(orders_df['DelayDays'] < 0, 0, orders_df['DelayDays'])

# 3. Mark delayed orders (delayed more than 1 day)
orders_df['IsDelayed'] = np.where(orders_df['DelayDays'] > 1, 1, 0)

# 4. Calculate total orders and delayed orders using NumPy
total_orders = len(orders_df)
delayed_orders = np.sum(orders_df['IsDelayed'])
percent_delayed = (delayed_orders / total_orders) * 100

print(f"\nTotal Orders: {total_orders}")
print(f"Delayed Orders: {delayed_orders}")
print(f"Percentage of Delayed Orders: {percent_delayed:.2f}%")

# 5. Inventory stock level statistics using NumPy
if 'StockQuantity' in inventory_df.columns:
    stock_array = inventory_df['StockQuantity'].astype(int).to_numpy()

    mean_stock = np.mean(stock_array)
    min_stock = np.min(stock_array)
    max_stock = np.max(stock_array)
    std_stock = np.std(stock_array)

    print("\nStock Level Statistics:")
    print(f"   Mean Stock: {mean_stock:.2f}")
    print(f"   Min Stock: {min_stock}")
    print(f"   Max Stock: {max_stock}")
    print(f"   Standard Deviation: {std_stock:.2f}")
else:
    print("\n'StockQuantity' column not found in inventory_df.")


Total Orders: 10
Delayed Orders: 10
Percentage of Delayed Orders: 100.00%

Stock Level Statistics:
   Mean Stock: 108.50
   Min Stock: 30
   Max Stock: 315
   Standard Deviation: 84.85


## Step 6: Display Cleaned Data

In [47]:
# Display cleaned Orders Data
print("\n--- Cleaned Orders Data ---")
print(orders_df[['OrderID', 'ProductID', 'SupplierID', 'DelayDays', 'IsDelayed']])

# Display cleaned Inventory Levels
print("\n--- Inventory Levels ---")
print(inventory_df[['ProductName', 'StockQuantity', 'ReorderLevel']])

# Check which products need reordering
inventory_df['NeedsReorder'] = np.where(inventory_df['StockQuantity'] < inventory_df['ReorderLevel'], 'Yes', 'No')

# Display reorder status
print("\n--- Reorder Check ---")
print(inventory_df[['ProductName', 'StockQuantity', 'ReorderLevel', 'NeedsReorder']])


--- Cleaned Orders Data ---
   OrderID  ProductID  SupplierID  DelayDays  IsDelayed
0        1          3           1          4          1
1        2          7           4          5          1
2        3          2           2          5          1
3        4         10           5          4          1
4        5          1           3          5          1
5        6          8           8          5          1
6        7          5           6          4          1
7        8          9          10          5          1
8        9          6           7          4          1
9       10          4           9          6          1

--- Inventory Levels ---
    ProductName  StockQuantity  ReorderLevel
0        Laptop             50            10
1         Mouse            200            30
2      Keyboard            150            25
3       Monitor             80            15
4       Printer             60            10
5     USB Cable            315            50
6  External HD

In [48]:
from google.colab import files
# Save to CSV
orders_df.to_csv("cleaned_orders.csv", index=False)
inventory_df.to_csv("cleaned_inventory.csv", index=False)
suppliers_df.to_csv("cleaned_suppliers.csv", index=False)

# Download the files
files.download("cleaned_orders.csv")
files.download("cleaned_inventory.csv")
files.download("cleaned_suppliers.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>