# Data Collection & Preparation (The Foundation)

In [2]:
import pandas as pd

In [3]:
import numpy as np

In [6]:
!pip install sqlalchemy
!pip install geopy

Collecting sqlalchemy
  Downloading sqlalchemy-2.0.43-cp312-cp312-win_amd64.whl.metadata (9.8 kB)
Collecting greenlet>=1 (from sqlalchemy)
  Downloading greenlet-3.2.4-cp312-cp312-win_amd64.whl.metadata (4.2 kB)
Downloading sqlalchemy-2.0.43-cp312-cp312-win_amd64.whl (2.1 MB)
   ---------------------------------------- 0.0/2.1 MB ? eta -:--:--
   ---- ----------------------------------- 0.3/2.1 MB ? eta -:--:--
   -------------- ------------------------- 0.8/2.1 MB 2.4 MB/s eta 0:00:01
   ----------------------------- ---------- 1.6/2.1 MB 2.9 MB/s eta 0:00:01
   ---------------------------------------- 2.1/2.1 MB 2.9 MB/s  0:00:00
Downloading greenlet-3.2.4-cp312-cp312-win_amd64.whl (299 kB)
Installing collected packages: greenlet, sqlalchemy

   ---------------------------------------- 0/2 [greenlet]
   ---------------------------------------- 0/2 [greenlet]
   -------------------- ------------------- 1/2 [sqlalchemy]
   -------------------- ------------------- 1/2 [sqlalchemy]
   --

In [7]:
from sqlalchemy import create_engine
from geopy.distance import geodesic

In [18]:
# Step 1: Create a database file
engine = create_engine('sqlite:///logistics.db')

# Step 2: Load the CSV file into a Pandas DataFrame
df = pd.read_csv('logistics_data.csv')

# Step 3: Write the DataFrame to a SQL table named 'routes'
# if_exists='replace' will create a new table every time this script is run
df.to_sql('routes', engine, if_exists='replace', index=False)

print("Data successfully loaded into the 'routes' table.")

# You can even run a simple SQL query to verify
with engine.connect() as conn:
    query = "SELECT * FROM routes LIMIT 2"
    result = pd.read_sql(query, conn)
    print("\nVerifying data with a SQL query:")
    print(result)

Data successfully loaded into the 'routes' table.

Verifying data with a SQL query:
  truck_id start_location end_location  load_weight_kg
0     T001      New Delhi       Mumbai            5000
1     T002      Bangalore      Chennai            3500


In [19]:
# Step 1: Connect to the database and load the data
engine = create_engine('sqlite:///logistics.db')
df = pd.read_sql_table('routes', engine)

# Step 2: Simulate real-time data (weather and traffic)
np.random.seed(42) # For consistent results
df['weather_condition'] = np.random.choice(['Clear', 'Rainy', 'Foggy'], len(df))
df['traffic_level'] = np.random.choice(['Low', 'Medium', 'High'], len(df))
df['time_of_day'] = np.random.choice(['Morning', 'Afternoon', 'Night'], len(df))

# Step 3: Calculate distance using geopy
# We need to get coordinates for each city. Let's use some example coordinates.
# In a real project, you would use a geocoding service.
city_coords = {
    "New Delhi": (28.7041, 77.1025),
    "Mumbai": (19.0760, 72.8777),
    "Bangalore": (12.9716, 77.5946),
    "Chennai": (13.0827, 80.2707),
    "Hyderabad": (17.3850, 78.4867),
    "Pune": (18.5204, 73.8567),
    "Kolkata": (22.5726, 88.3639)
}

def calculate_distance(row):
    start_point = city_coords[row['start_location']]
    end_point = city_coords[row['end_location']]
    return geodesic(start_point, end_point).km

df['distance_km'] = df.apply(calculate_distance, axis=1)

# Step 4: A simple proxy for carbon footprint based on features
# This is our target variable for the machine learning model.
df['base_carbon_kg'] = df['distance_km'] * 0.2 + df['load_weight_kg'] * 0.01  # Example formula
weather_impact = {'Clear': 1.0, 'Rainy': 1.1, 'Foggy': 1.2}
traffic_impact = {'Low': 1.0, 'Medium': 1.2, 'High': 1.5}
time_impact = {'Morning': 1.1, 'Afternoon': 1.0, 'Night': 1.2}

df['total_carbon_kg'] = (
    df['base_carbon_kg'] * df['weather_condition'].map(weather_impact) *
    df['traffic_level'].map(traffic_impact) *
    df['time_of_day'].map(time_impact)
)

# Step 5: Save the preprocessed data for the next step (Model Building)
df.to_csv('processed_logistics_data.csv', index=False)
print("Data preprocessing complete. Saved to 'processed_logistics_data.csv'")
print("\nFinal Processed Data (First 5 rows):")
print(df.head())

Data preprocessing complete. Saved to 'processed_logistics_data.csv'

Final Processed Data (First 5 rows):
  truck_id start_location end_location  load_weight_kg weather_condition  \
0     T001      New Delhi       Mumbai            5000             Foggy   
1     T002      Bangalore      Chennai            3500             Clear   
2     T003      Hyderabad         Pune            6200             Foggy   
3     T004         Mumbai      Kolkata            4800             Foggy   
4     T005      New Delhi    Bangalore            5500             Clear   

  traffic_level time_of_day  distance_km  base_carbon_kg  total_carbon_kg  
0           Low   Afternoon  1149.608388      279.921678       335.906013  
1        Medium       Night   290.543167       93.108633       134.076432  
2        Medium   Afternoon   506.291027      163.258205       235.091816  
3        Medium   Afternoon  1656.861150      379.372230       546.296011  
4        Medium       Night  1742.650905      403.530181

# Predictive Model Building (The Brains)

In [20]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

In [23]:
# Data Loading & Splitting

# Step 1: Load the preprocessed data
df = pd.read_csv('processed_logistics_data.csv')
print("Loaded processed data.")

# Step 2: Define features (X) and target (y)
# We will drop the original categorical columns after encoding them.
features = ['distance_km', 'load_weight_kg', 'weather_condition', 'traffic_level', 'time_of_day']
X = df[features]
y = df['total_carbon_kg']

# Step 3: Handle categorical data (One-Hot Encoding)
# Machine learning models only understand numbers, so we convert text into numbers.
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
X_encoded = encoder.fit_transform(X[['weather_condition', 'traffic_level', 'time_of_day']])
X_encoded_df = pd.DataFrame(X_encoded, columns=encoder.get_feature_names_out())

# Drop original categorical columns and add the encoded ones
X = X.drop(columns=['weather_condition', 'traffic_level', 'time_of_day'])
X = pd.concat([X.reset_index(drop=True), X_encoded_df], axis=1)

print("\nFeatures after One-Hot Encoding:")
print(X.head())

# Step 4: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"\nTraining set size: {X_train.shape[0]} rows")
print(f"Testing set size: {X_test.shape[0]} rows")

Loaded processed data.

Features after One-Hot Encoding:
   distance_km  load_weight_kg  weather_condition_Clear  \
0  1149.608388            5000                      0.0   
1   290.543167            3500                      1.0   
2   506.291027            6200                      0.0   
3  1656.861150            4800                      0.0   
4  1742.650905            5500                      1.0   

   weather_condition_Foggy  weather_condition_Rainy  traffic_level_High  \
0                      1.0                      0.0                 0.0   
1                      0.0                      0.0                 0.0   
2                      1.0                      0.0                 0.0   
3                      1.0                      0.0                 0.0   
4                      0.0                      0.0                 0.0   

   traffic_level_Low  traffic_level_Medium  time_of_day_Afternoon  \
0                1.0                   0.0                    1.0   

In [24]:
#Model Training & Evaluation

# Step 5: Initialize and train the Machine Learning model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

print("\nModel training complete.")

# Step 6: Make predictions on the test set
y_pred = model.predict(X_test)

# Step 7: Evaluate the model's performance
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"\nModel Performance Metrics:")
print(f"Mean Absolute Error (MAE): {mae:.2f} kg")
print(f"R-squared Score (R2): {r2:.2f}")


Model training complete.

Model Performance Metrics:
Mean Absolute Error (MAE): 33.17 kg
R-squared Score (R2): -0.18


# Optimization & Recommendation System (The Unique Part)

In [28]:
#Create a Prediction Function
!pip install joblib



In [32]:

import joblib

# 1. Load the trained model and encoder
# Note: Aapko pehle model aur encoder ko save karna hoga.
# Upar ke code mein, model.fit() ke baad ye code add karein:
# joblib.dump(model, 'carbon_model.pkl')
# joblib.dump(encoder, 'onehot_encoder.pkl')

# Assuming you have saved them:
model = joblib.load('carbon_model.pkl')
encoder = joblib.load('onehot_encoder.pkl')

# 2. Define city coordinates and impact factors (same as before)
city_coords = {
    "New Delhi": (28.7041, 77.1025), "Mumbai": (19.0760, 72.8777),
    "Bangalore": (12.9716, 77.5946), "Chennai": (13.0827, 80.2707),
    "Hyderabad": (17.3850, 78.4867), "Pune": (18.5204, 73.8567),
    "Kolkata": (22.5726, 88.3639)
}

weather_impact = {'Clear': 1.0, 'Rainy': 1.1, 'Foggy': 1.2}
traffic_impact = {'Low': 1.0, 'Medium': 1.2, 'High': 1.5}
time_impact = {'Morning': 1.1, 'Afternoon': 1.0, 'Night': 1.2}

def predict_carbon_footprint(start_loc, end_loc, load_kg, weather, traffic, time_of_day):
    # Calculate distance
    dist_km = geodesic(city_coords[start_loc], city_coords[end_loc]).km

    # Create a DataFrame for prediction
    # Column names must match the training data
    input_data = pd.DataFrame([{
        'distance_km': dist_km,
        'load_weight_kg': load_kg,
        'weather_condition': weather,
        'traffic_level': traffic,
        'time_of_day': time_of_day
    }])

    # Apply the same one-hot encoding as during training
    categorical_features = ['weather_condition', 'traffic_level', 'time_of_day']
    encoded_features = encoder.transform(input_data[categorical_features])
    encoded_df = pd.DataFrame(encoded_features, columns=encoder.get_feature_names_out())

    # Combine numerical and encoded features
    final_input = pd.concat([
        input_data[['distance_km', 'load_weight_kg']].reset_index(drop=True),
        encoded_df
    ], axis=1)

    # Predict using the trained model
    prediction = model.predict(final_input)
    return prediction[0]

# Let's test the function
sample_prediction = predict_carbon_footprint("New Delhi", "Mumbai", 5000, "Clear", "High", "Morning")
print(f"\nPredicted carbon footprint for a sample trip: {sample_prediction:.2f} kg")


Predicted carbon footprint for a sample trip: 400.95 kg


In [34]:
# Build the Optimization Logic

# Step 3: Optimization function to find the best route/scenario
def find_best_route_scenario(start_loc, end_loc, load_kg):
    # Define all possible scenarios
    possible_scenarios = []
    weathers = ['Clear', 'Rainy'] # Simplified for example
    traffics = ['Low', 'Medium', 'High']
    times = ['Morning', 'Afternoon', 'Night']

    for weather in weathers:
        for traffic in traffics:
            for time in times:
                carbon_kg = predict_carbon_footprint(
                    start_loc, end_loc, load_kg, weather, traffic, time
                )
                possible_scenarios.append({
                    'start_location': start_loc,
                    'end_location': end_loc,
                    'load_kg': load_kg,
                    'weather': weather,
                    'traffic': traffic,
                    'time_of_day': time,
                    'carbon_kg': carbon_kg
                })

    # Find the scenario with the minimum carbon footprint
    best_scenario = min(possible_scenarios, key=lambda x: x['carbon_kg'])
    return best_scenario

# Let's test the optimization logic
best_option = find_best_route_scenario("New Delhi", "Mumbai", 5000)
print("\n--- Optimized Route Recommendation ---")
print(f"Start Location: {best_option['start_location']}")
print(f"End Location: {best_option['end_location']}")
print(f"Recommended Time: {best_option['time_of_day']} with {best_option['traffic']} traffic and {best_option['weather']} weather.")
print(f"Optimized Carbon Footprint: {best_option['carbon_kg']:.2f} kg")


--- Optimized Route Recommendation ---
Start Location: New Delhi
End Location: Mumbai
Recommended Time: Morning with Low traffic and Rainy weather.
Optimized Carbon Footprint: 345.51 kg


In [38]:
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler

# PyTorch for Time-Series Forecasting

In [40]:
# 1. Dummy Time-Series Data (Replace with real data)
# Real data will have 'time', 'traffic_level', 'weather_condition'
data = {
    'time': pd.to_datetime(pd.date_range(start='2025-01-01', periods=100, freq='h')),
    'traffic_level': np.random.randint(1, 4, 100),
    'weather_condition': np.random.randint(1, 4, 100)
}
df = pd.DataFrame(data).set_index('time')

# 2. Preprocess Data
scaler_traffic = MinMaxScaler()
scaler_weather = MinMaxScaler()
df['traffic_scaled'] = scaler_traffic.fit_transform(df[['traffic_level']])
df['weather_scaled'] = scaler_weather.fit_transform(df[['weather_condition']])

# 3. Create Sequences for LSTM
def create_sequences(input_data, sequence_length):
    sequences = []
    for i in range(len(input_data) - sequence_length):
        seq = input_data[i:i + sequence_length]
        label = input_data[i + sequence_length]
        sequences.append((seq, label))
    return sequences

sequence_length = 12 # Predict based on last 12 hours
traffic_sequences = create_sequences(df['traffic_scaled'].values, sequence_length)
weather_sequences = create_sequences(df['weather_scaled'].values, sequence_length)

# 4. Define LSTM Model
class LSTMForecaster(nn.Module):
    def __init__(self, input_size=1, hidden_layer_size=50, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.lstm = nn.LSTM(input_size, hidden_layer_size)
        self.linear = nn.Linear(hidden_layer_size, output_size)
        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
                            torch.zeros(1,1,self.hidden_layer_size))

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq), 1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions[-1]

# 5. Train the models (Traffic and Weather)
def train_model(sequences, scaler):
    model = LSTMForecaster()
    loss_function = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    epochs = 50
    for i in range(epochs):
        for seq, labels in sequences:
            optimizer.zero_grad()
            model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                                 torch.zeros(1, 1, model.hidden_layer_size))
            y_pred = model(torch.Tensor(seq).view(-1, 1))
            single_loss = loss_function(y_pred, torch.Tensor([labels]))
            single_loss.backward()
            optimizer.step()
    return model

traffic_model = train_model(traffic_sequences, scaler_traffic)
weather_model = train_model(weather_sequences, scaler_weather)

# 6. Save the models and scalers
torch.save(traffic_model.state_dict(), 'traffic_lstm.pth')
torch.save(weather_model.state_dict(), 'weather_lstm.pth')
joblib.dump(scaler_traffic, 'scaler_traffic.pkl')
joblib.dump(scaler_weather, 'scaler_weather.pkl')

print("PyTorch models and scalers saved successfully!")

PyTorch models and scalers saved successfully!


# TensorFlow for Computer Vision

In [41]:
pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.20.0-cp312-cp312-win_amd64.whl.metadata (4.6 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google_pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting opt_einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting protobuf>=5.28.0 (from tensorflow)
  Downloading protobuf-6.32

In [42]:
import tensorflow as tf
from tensorflow import keras
from keras import layers

In [43]:
# 1. Data Preparation (dummy data, replace with real image data)
def create_dummy_dataset(num_images=100):
    images = np.random.rand(num_images, 64, 64, 3) * 255
    labels = np.random.randint(0, 2, num_images) # 0 for clear, 1 for congested
    return images, labels

X_train, y_train = create_dummy_dataset()
X_test, y_test = create_dummy_dataset(20)

# 2. Define CNN Model
model = keras.Sequential([
    layers.Rescaling(1./255, input_shape=(64, 64, 3)),
    layers.Conv2D(32, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid') # Sigmoid for binary classification
])

# 3. Compile and Train
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

# 4. Save the model
model.save('road_condition_classifier.h5')
print("TensorFlow model saved successfully as 'road_condition_classifier.h5'!")

  super().__init__(**kwargs)


Epoch 1/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 227ms/step - accuracy: 0.4800 - loss: 1.7371 - val_accuracy: 0.5500 - val_loss: 0.6898
Epoch 2/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - accuracy: 0.4200 - loss: 0.7062 - val_accuracy: 0.5500 - val_loss: 0.6925
Epoch 3/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - accuracy: 0.5400 - loss: 0.6870 - val_accuracy: 0.4500 - val_loss: 0.7143
Epoch 4/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.5800 - loss: 0.6806 - val_accuracy: 0.4500 - val_loss: 0.7310
Epoch 5/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step - accuracy: 0.5800 - loss: 0.6782 - val_accuracy: 0.4500 - val_loss: 0.7358




TensorFlow model saved successfully as 'road_condition_classifier.h5'!


In [44]:
from flask import Flask, request, jsonify
# Load base model from old project
base_carbon_model = joblib.load('base_carbon_model.pkl')
encoder = joblib.load('onehot_encoder.pkl')

# Load PyTorch models
from pytorch_forecaster import LSTMForecaster # Import the class
traffic_lstm = LSTMForecaster()
traffic_lstm.load_state_dict(torch.load('traffic_lstm.pth'))
traffic_lstm.eval()

weather_lstm = LSTMForecaster()
weather_lstm.load_state_dict(torch.load('weather_lstm.pth'))
weather_lstm.eval()

scaler_traffic = joblib.load('scaler_traffic.pkl')
scaler_weather = joblib.load('scaler_weather.pkl')

# Load TensorFlow model
road_condition_classifier = tf.keras.models.load_model('road_condition_classifier.h5')

# City coordinates and impact factors (unchanged)
city_coords = {
    "New Delhi": (28.7041, 77.1025), "Mumbai": (19.0760, 72.8777),
    "Bangalore": (12.9716, 77.5946), "Chennai": (13.0827, 80.2707),
    "Hyderabad": (17.3850, 78.4867), "Pune": (18.5204, 73.8567),
    "Kolkata": (22.5726, 88.3639)
}
weather_impact = {'Clear': 1.0, 'Rainy': 1.1, 'Foggy': 1.2}
traffic_impact = {'Low': 1.0, 'Medium': 1.2, 'High': 1.5}

app = Flask(__name__)

# Main prediction route
@app.route('/predict_carbon', methods=['POST'])
def predict_route():
    data = request.get_json()
    start_loc = data['start_location']
    end_loc = data['end_location']
    load_kg = data['load_weight_kg']

    # New Feature 1: Get real-time predictions from PyTorch models
    # Dummy current data (replace with real-time data from API)
    current_traffic_level = np.random.randint(1, 4)
    current_weather_level = np.random.randint(1, 4)
    
    # Scale the current data
    traffic_scaled = scaler_traffic.transform([[current_traffic_level]])
    weather_scaled = scaler_weather.transform([[current_weather_level]])

    # PyTorch prediction for the next hour
    predicted_traffic_scaled = traffic_lstm(torch.Tensor(traffic_scaled)).item()
    predicted_weather_scaled = weather_lstm(torch.Tensor(weather_scaled)).item()

    predicted_traffic_level = int(round(scaler_traffic.inverse_transform([[predicted_traffic_scaled]])[0][0]))
    predicted_weather_level = int(round(scaler_weather.inverse_transform([[predicted_weather_scaled]])[0][0]))
    
    # New Feature 2: Get road condition from TensorFlow model
    # Dummy image data (replace with a real image from an API)
    dummy_image = np.random.rand(1, 64, 64, 3) * 255
    road_condition_pred = road_condition_classifier.predict(dummy_image)
    road_condition = 'Congested' if road_condition_pred > 0.5 else 'Clear'
    
    # Map numerical predictions back to labels
    traffic_map = {1: 'Low', 2: 'Medium', 3: 'High'}
    weather_map = {1: 'Clear', 2: 'Rainy', 3: 'Foggy'}
    predicted_traffic_label = traffic_map.get(predicted_traffic_level, 'Medium')
    predicted_weather_label = weather_map.get(predicted_weather_level, 'Clear')

    # Calculate base carbon using the old model
    dist_km = geodesic(city_coords[start_loc], city_coords[end_loc]).km
    input_data = pd.DataFrame([[dist_km, load_kg]], columns=['distance_km', 'load_weight_kg'])
    base_carbon_kg = base_carbon_model.predict(input_data)[0]

    # Combine all predictions to get the final carbon footprint
    final_carbon_kg = (
        base_carbon_kg *
        weather_impact.get(predicted_weather_label, 1.0) *
        traffic_impact.get(predicted_traffic_label, 1.0) *
        (1.5 if road_condition == 'Congested' else 1.0) # Apply impact from TensorFlow
    )

    return jsonify({
        "start_location": start_loc,
        "end_location": end_loc,
        "predicted_traffic": predicted_traffic_label,
        "predicted_weather": predicted_weather_label,
        "road_condition": road_condition,
        "optimized_carbon_footprint_kg": round(final_carbon_kg, 2)
    })

if __name__ == '__main__':
    app.run(debug=True)

FileNotFoundError: [Errno 2] No such file or directory: 'base_carbon_model.pkl'