In [5]:
from web3 import Web3
import pandas as pd

# Step 1: Retrieve your Milestone 1 output to start the retrieval process
print("📥 Starting Week 6: Data Retrieval and Processing")
print("🔗 Connecting to blockchain...")

# Connect to Ganache blockchain
ganache_url = "http://127.0.0.1:7545"
web3 = Web3(Web3.HTTPProvider(ganache_url))

if web3.is_connected():
    print("✅ Connected to Ganache successfully!")
else:
    print("❌ Connection failed. Make sure Ganache is running.")
    exit()

# Contract details (from your Week 5 deployment)
contract_address = web3.to_checksum_address("0x77CD879a6985B97C7deBFD7a15e46dAde5aE32bC")
contract_abi = [
    # Your contract ABI (same as Week 5)
    {
        "inputs": [{"internalType": "address","name": "device","type": "address"}],
        "name": "authorizeDevice","outputs": [],"stateMutability": "nonpayable","type": "function"
    },
    {
        "inputs": [{"internalType": "address","name": "device","type": "address"}],
        "name": "revokeDevice","outputs": [],"stateMutability": "nonpayable","type": "function"
    },
    {
        "inputs": [
            {"internalType": "string","name": "deviceID","type": "string"},
            {"internalType": "string","name": "dataType","type": "string"},
            {"internalType": "string","name": "value","type": "string"}
        ],
        "name": "storeData","outputs": [],"stateMutability": "nonpayable","type": "function"
    },
    {
        "inputs": [],"stateMutability": "nonpayable","type": "constructor"
    },
    {
        "inputs": [{"internalType": "address","name": "","type": "address"}],
        "name": "authorizedDevices","outputs": [{"internalType": "bool","name": "","type": "bool"}],
        "stateMutability": "view","type": "function"
    },
    {
        "inputs": [
            {"internalType": "string","name": "deviceID","type": "string"},
            {"internalType": "uint256","name": "index","type": "uint256"}
        ],
        "name": "getDataByIndex",
        "outputs": [
            {"internalType": "string","name": "","type": "string"},
            {"internalType": "string","name": "","type": "string"},
            {"internalType": "uint256","name": "","type": "uint256"}
        ],
        "stateMutability": "view","type": "function"
    },
    {
        "inputs": [{"internalType": "string","name": "deviceID","type": "string"}],
        "name": "getDataCount",
        "outputs": [{"internalType": "uint256","name": "","type": "uint256"}],
        "stateMutability": "view","type": "function"
    },
    {
        "inputs": [],"name": "owner",
        "outputs": [{"internalType": "address","name": "","type": "address"}],
        "stateMutability": "view","type": "function"
    }
]

# Load the smart contract
contract = web3.eth.contract(address=contract_address, abi=contract_abi)
print("✅ Connected to Smart Contract")

# Step 2: Get the total number of stored records
print("\n📊 Calculating total stored records...")

# Load original CSV to get device IDs (from Week 2)
df_original = pd.read_csv("Logistics-Data.csv")
device_ids = df_original["package_id"].unique()

# Count total records across all devices
total_records = 0
for device_id in device_ids:
    device_count = contract.functions.getDataCount(str(device_id)).call()
    total_records += device_count

print(f"Total IoT records stored: {total_records}")

# Step 3: Fetch all stored IoT data and structure it in a DataFrame
print("\n📥 Retrieving all IoT records...")

# Create empty list to store all data
data = []

# Loop through each device and get all its records
for device_id in device_ids:
    device_count = contract.functions.getDataCount(str(device_id)).call()
    
    # Get each record for this device
    for i in range(device_count):
        try:
            # Get record from blockchain (returns: data_type, value, timestamp)
            record = contract.functions.getDataByIndex(str(device_id), i).call()
            
            # Structure the data like the PDF example
            data.append({
                "timestamp": record[2],        # blockchain timestamp
                "device_id": device_id,        # package ID (PKG85046, etc.)
                "data_type": record[0],        # sensor type (temperature, humidity, shock)
                "data_value": record[1]        # sensor reading
            })
        except Exception as e:
            print(f"❌ Error retrieving record {i} for {device_id}: {e}")

# Convert to a DataFrame (like Excel spreadsheet)
df = pd.DataFrame(data)

# Step 4: Convert timestamp to readable format
print("\n🕐 Converting timestamps...")
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s")

# Step 5: Data preprocessing - extract numerical values
print("\n🔧 Processing data...")

# Extract numeric values from sensor readings (removes units like °C, %, etc.)
df["numeric_value"] = df["data_value"].str.extract(r'(\d+\.?\d*)').astype(float)

# Handle missing values - replace with 0 if conversion failed
df.fillna(0, inplace=True)

# Display first few records (like PDF example)
print("\n📄 First few processed records:")
print(df.head())

# Step 6: Save the DataFrame as a CSV file
print("\n💾 Saving cleaned data...")

# Save all cleaned IoT data to one CSV file (following PDF format)
df.to_csv("cleaned_iot_data.csv", index=False)
print("✅ Cleaned IoT data saved successfully as cleaned_iot_data.csv")

# Show final summary
print(f"\n📊 Data Processing Summary:")
print(f"   📦 Total records processed: {len(df)}")
print(f"   📱 Unique devices: {df['device_id'].nunique()}")
print(f"   📈 Data types: {', '.join(df['data_type'].unique())}")
print(f"   📅 Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")

print(f"\n✅ 📁 Output file: cleaned_iot_data.csv")


# Step 7: Preview the cleaned data structure
print(f"\n📋 Cleaned data structure:")
print(df.info())
print(f"\n📊 Sample numeric values by data type:")
for data_type in df['data_type'].unique():
    type_data = df[df['data_type'] == data_type]['numeric_value']
    print(f"   {data_type}: {type_data.min():.1f} - {type_data.max():.1f}")

📥 Starting Week 6: Data Retrieval and Processing
🔗 Connecting to blockchain...
✅ Connected to Ganache successfully!
✅ Connected to Smart Contract

📊 Calculating total stored records...
Total IoT records stored: 300

📥 Retrieving all IoT records...

🕐 Converting timestamps...

🔧 Processing data...

📄 First few processed records:
            timestamp device_id    data_type data_value  numeric_value
0 2025-06-06 18:17:06  PKG85046  temperature       26.0           26.0
1 2025-06-06 18:17:06  PKG85046     humidity       51.6           51.6
2 2025-06-06 18:17:06  PKG85046        shock        0.2            0.2
3 2025-06-06 18:17:07  PKG59811  temperature       11.7           11.7
4 2025-06-06 18:17:07  PKG59811     humidity       84.9           84.9

💾 Saving cleaned data...
✅ Cleaned IoT data saved successfully as cleaned_iot_data.csv

📊 Data Processing Summary:
   📦 Total records processed: 300
   📱 Unique devices: 100
   📈 Data types: temperature, humidity, shock
   📅 Date range: 2025-0