<a href="https://colab.research.google.com/github/Apekshaa2908/From-Clicks-to-Deliveries-Maximizing-E-commerce-Performance-with-Real-Time-Data-Integration/blob/main/From_Clicks_to_Deliveries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
# 1.Data Generation Purpose

%%writefile data_generation.py
import random

def generate_clickstream_data():
    items = [
        {"Item ID": "ITEM001", "Item Name": "Mobile Phone", "Click Count": random.randint(100, 1000)},
        {"Item ID": "ITEM002", "Item Name": "Laptop", "Click Count": random.randint(50, 500)},
        {"Item ID": "ITEM003", "Item Name": "Camera", "Click Count": random.randint(30, 300)},
    ]
    return items

def generate_truck_data():
    trucks = [
        {
            "truck_id": "TRK001",
            "gps_location": {
                "latitude": random.uniform(34.0, 36.0),
                "longitude": random.uniform(-118.0, -115.0),
                "altitude": random.uniform(50.0, 100.0),
                "speed": random.uniform(40.0, 70.0)
            },
            "vehicle_speed": random.uniform(40.0, 70.0),
            "engine_diagnostics": {
                "engine_rpm": random.randint(2000, 3000),
                "fuel_level": random.uniform(50.0, 100.0),
                "temperature": random.uniform(80.0, 100.0),
                "oil_pressure": random.uniform(30.0, 50.0),
                "battery_voltage": random.uniform(13.5, 14.0)
            },
            "odometer_reading": random.uniform(80000.0, 130000.0),
            "fuel_consumption": random.uniform(10.0, 20.0),
            "vehicle_health_and_maintenance": {
                "brake_status": random.choice(["Good", "Needs Inspection"]),
                "tire_pressure": {
                    "front_left": random.uniform(30.0, 35.0),
                    "front_right": random.uniform(30.0, 35.0),
                    "rear_left": random.uniform(34.0, 36.0),
                    "rear_right": random.uniform(34.0, 36.0)
                },
                "transmission_status": "Operational"
            },
            "environmental_conditions": {
                "temperature": random.uniform(18.0, 30.0),
                "humidity": random.uniform(20.0, 60.0),
                "atmospheric_pressure": random.uniform(1008.0, 1013.25)
            }
        },
        # Add more trucks as needed
    ]
    return trucks



Overwriting data_generation.py


In [None]:
%%writefile data_generation.py
import random

def generate_clickstream_data():
    items = [
        {"Item ID": "ITEM001", "Item Name": "Mobile Phone", "Click Count": random.randint(100, 1000)},
        {"Item ID": "ITEM002", "Item Name": "Laptop", "Click Count": random.randint(50, 500)},
        {"Item ID": "ITEM003", "Item Name": "Camera", "Click Count": random.randint(30, 300)},
    ]
    return items

def generate_truck_data(num_trucks=3):
    trucks = []
    for i in range(num_trucks):
        truck = {
            "truck_id": f"TRK{i+1:03d}",
            "gps_location": {
                "latitude": random.uniform(34.0, 41.0),
                "longitude": random.uniform(-118.0, -74.0),
                "altitude": round(random.uniform(10.0, 610.0), 1),
                "speed": round(random.uniform(40.0, 70.0), 1)
            },
            "vehicle_speed": round(random.uniform(40.0, 70.0), 1),
            "engine_diagnostics": {
                "engine_rpm": random.randint(2000, 3000),
                "fuel_level": round(random.uniform(50.0, 100.0), 1),
                "temperature": round(random.uniform(80.0, 100.0), 1),
                "oil_pressure": round(random.uniform(30.0, 50.0), 1),
                "battery_voltage": round(random.uniform(13.5, 14.0), 1)
            },
            "odometer_reading": round(random.uniform(80000.0, 130000.0), 1),
            "fuel_consumption": round(random.uniform(10.0, 20.0), 1),
            "vehicle_health_and_maintenance": {
                "brake_status": random.choice(["Good", "Needs Inspection"]),
                "tire_pressure": {
                    "front_left": round(random.uniform(30.0, 35.0), 1),
                    "front_right": round(random.uniform(30.0, 35.0), 1),
                    "rear_left": round(random.uniform(34.0, 36.0), 1),
                    "rear_right": round(random.uniform(34.0, 36.0), 1)
                },
                "transmission_status": "Operational"
            },
            "environmental_conditions": {
                "temperature": round(random.uniform(18.0, 30.0), 1),
                "humidity": round(random.uniform(20.0, 70.0), 1),
                "atmospheric_pressure": round(random.uniform(1008.0, 1013.25), 2)
            }
        }
        trucks.append(truck)
    return trucks

In [40]:
# 2. API Creation Purpose
%%writefile api.py
from flask import Flask, jsonify
import boto3
from boto3.dynamodb.conditions import Key

app = Flask(__name__)

# Initialize DynamoDB client
dynamodb = boto3.resource('dynamodb', region_name='ap-southeast-2',
    aws_access_key_id='id',
    aws_secret_access_key='key')

# Truck telemetry data from DynamoDB
@app.route('/api/trucks', methods=['GET'])
def get_truck_data():
    table = dynamodb.Table('TruckTelemetryData')
    response = table.scan()  # Or use more efficient querying based on specific fields like timestamp, truck_id, etc.
    return jsonify({"trucks": response['Items']})

# Clickstream data from DynamoDB
@app.route('/api/clickstream', methods=['GET'])
def get_clickstream_data():
    table = dynamodb.Table('ClickstreamDataStream')
    response = table.scan()
    return jsonify({"clickstream": response['Items']})

if __name__ == '__main__':
    app.run(debug=True)



Overwriting api.py


In [23]:
# 3. Data Streaming with AWS Kinesis Purpose
%%writefile kinesis_streaming.py
import boto3
import json
import time
from data_generation import generate_clickstream_data, generate_truck_data

# Initialize Kinesis client
kinesis = boto3.client('kinesis',
                       region_name='ap-southeast-2',
                       aws_access_key_id='id',
                       aws_secret_access_key='key')

def stream_clickstream_data():
    while True:
        clickstream_data = generate_clickstream_data()
        for item in clickstream_data:
            kinesis.put_record(
                StreamName="ClickstreamDataStream",
                Data=json.dumps(item),
                PartitionKey=item['Item ID']
            )
        time.sleep(1)

def stream_truck_data():
    while True:
        truck_data = generate_truck_data()
        for truck in truck_data:
            kinesis.put_record(
                StreamName="TruckDataStream",
                Data=json.dumps(truck),
                PartitionKey=truck['truck_id']
            )
        time.sleep(60)


Writing kinesis_streaming.py


In [None]:
# 4. Data Processing with AWS Lambda Purpose:
## Clickstream
import json
import base64

def lambda_handler(event, context):
    for record in event['Records']:
        payload = json.loads(base64.b64decode(record['kinesis']['data']))
        # Process clickstream data
        print("Processed clickstream data:", payload)

    return {
        'statusCode': 200,
        'body': json.dumps('Clickstream Processing Complete')
    }

## Truckstream
import json
import base64

def lambda_handler(event, context):
    for record in event['Records']:
        payload = json.loads(base64.b64decode(record['kinesis']['data']))
        # Process truck data
        print("Processed truck data:", payload)

    return {
        'statusCode': 200,
        'body': json.dumps('Truck Data Processing Complete')
    }


In [28]:
# 5. Data Storage with Snowflake/DynamoDB Purpose:
import boto3
import time
from decimal import Decimal
from data_generation import generate_truck_data

# Initialize DynamoDB client
dynamodb = boto3.resource('dynamodb',
    region_name='ap-southeast-2',
    aws_access_key_id='id',
    aws_secret_access_key='key')

# Create table (One-time setup)
def create_table():
    table = dynamodb.create_table(
        TableName='TruckTelemetryData',
        KeySchema=[
            {'AttributeName': 'truck_id', 'KeyType': 'HASH'},
            {'AttributeName': 'timestamp', 'KeyType': 'RANGE'}
        ],
        AttributeDefinitions=[
            {'AttributeName': 'truck_id', 'AttributeType': 'S'},
            {'AttributeName': 'timestamp', 'AttributeType': 'N'}
        ],
        ProvisionedThroughput={'ReadCapacityUnits': 5, 'WriteCapacityUnits': 5}
    )
    table.wait_until_exists()  # Wait until the table is created
    return table

# Utility function to convert float to Decimal
def float_to_decimal(value):
    if isinstance(value, float):
        return Decimal(str(value))
    return Decimal(value)

# Insert data with SCD Type 2 logic
def store_truck_data(truck_data):
    table = dynamodb.Table('TruckTelemetryData')
    for truck in truck_data:
        table.put_item(Item={
            'truck_id': truck['truck_id'],
            'timestamp': float_to_decimal(time.time()),  # Convert timestamp to Decimal
            'gps_location': {
                'latitude': float_to_decimal(truck['gps_location']['latitude']),
                'longitude': float_to_decimal(truck['gps_location']['longitude']),
                'altitude': float_to_decimal(truck['gps_location']['altitude']),
                'speed': float_to_decimal(truck['gps_location']['speed'])
            },
            'vehicle_speed': float_to_decimal(truck['vehicle_speed']),
            'engine_diagnostics': {
                'engine_rpm': float_to_decimal(truck['engine_diagnostics']['engine_rpm']),
                'fuel_level': float_to_decimal(truck['engine_diagnostics']['fuel_level']),
                'temperature': float_to_decimal(truck['engine_diagnostics']['temperature']),
                'oil_pressure': float_to_decimal(truck['engine_diagnostics']['oil_pressure']),
                'battery_voltage': float_to_decimal(truck['engine_diagnostics']['battery_voltage'])
            },
            'odometer_reading': float_to_decimal(truck['odometer_reading']),
            'fuel_consumption': float_to_decimal(truck['fuel_consumption']),
            'vehicle_health_and_maintenance': {
                'brake_status': truck['vehicle_health_and_maintenance']['brake_status'],
                'tire_pressure': {
                    'front_left': float_to_decimal(truck['vehicle_health_and_maintenance']['tire_pressure']['front_left']),
                    'front_right': float_to_decimal(truck['vehicle_health_and_maintenance']['tire_pressure']['front_right']),
                    'rear_left': float_to_decimal(truck['vehicle_health_and_maintenance']['tire_pressure']['rear_left']),
                    'rear_right': float_to_decimal(truck['vehicle_health_and_maintenance']['tire_pressure']['rear_right'])
                },
                'transmission_status': truck['vehicle_health_and_maintenance']['transmission_status']
            },
            'environmental_conditions': {
                'temperature': float_to_decimal(truck['environmental_conditions']['temperature']),
                'humidity': float_to_decimal(truck['environmental_conditions']['humidity']),
                'atmospheric_pressure': float_to_decimal(truck['environmental_conditions']['atmospheric_pressure'])
            }
        })

# Example usage
if __name__ == "__main__":
    create_table()
    store_truck_data(generate_truck_data())


ResourceInUseException: An error occurred (ResourceInUseException) when calling the CreateTable operation: Table already exists: TruckTelemetryData

In [42]:
# 6. Streamlit Presentation Purpose:
%%writefile app.py
import streamlit as st
import boto3
import time

# Initialize DynamoDB client with AWS credentials
region_name = 'ap-southeast-2'
aws_access_key_id='id'
aws_secret_access_key='key'


dynamodb = boto3.resource(
    'dynamodb',
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    region_name=region_name
)

# Get reference to the DynamoDB table
table = dynamodb.Table('TruckTelemetryData')

# Streamlit UI components
st.title('Truck Telemetry Data Dashboard')

# Input field for Truck ID
truck_id = st.text_input('Enter Truck ID to view telemetry data:')

# Button to fetch telemetry data
if st.button('Fetch Data'):
    if truck_id:
        # Query DynamoDB table to fetch telemetry data for the given Truck ID
        response = table.query(
            KeyConditionExpression=boto3.dynamodb.conditions.Key('truck_id').eq(truck_id)
        )

        # Display telemetry data if found
        if response['Items']:
            st.write(f"Telemetry Data for Truck ID: {truck_id}")

            # Extract and display data
            for item in response['Items']:
                st.json(item)
        else:
            st.warning(f"No data found for Truck ID: {truck_id}")
    else:
        st.error("Please enter a Truck ID.")

# Display historical data for all trucks
st.title('Historical Truck Telemetry Data')

# Query DynamoDB table to fetch all telemetry data
response = table.scan()

# Display data for all trucks
if response['Items']:
    st.write("All Telemetry Data:")
    for item in response['Items']:
        st.write(f"Truck ID: {item['truck_id']}")
        st.json(item)
else:
    st.warning("No historical data found.")


Overwriting app.py


In [None]:
! wget -q -O - ipv4.icanhazip.com
! streamlit run app.py & npx localtunnel --port 8501