In [27]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler

def convert_time_to_minutes(time_str):
    try:
        # Split the time string (e.g., '27.00:00:00')
        parts = time_str.split(":")
        hours = int(parts[0])
        minutes = int(parts[1])
        seconds = int(parts[2]) if len(parts) > 2 else 0
        total_minutes = hours * 60 + minutes + seconds / 60
        return total_minutes
    except Exception:
        return None  # Return None for invalid values

def load_and_preprocess(file_path):
    data = pd.read_csv(file_path)

    # Convert time-like strings to numeric minutes
    data['Previous Delivery Duration (min)'] = data['Previous Delivery Duration (min)'].apply(convert_time_to_minutes)
    data['Estimated Travel Time (min)'] = data['Estimated Travel Time (min)'].apply(convert_time_to_minutes)

    # Handle missing or invalid values
    data['Previous Delivery Duration (min)'].fillna(data['Previous Delivery Duration (min)'].median(), inplace=True)
    data['Estimated Travel Time (min)'].fillna(data['Estimated Travel Time (min)'].median(), inplace=True)

    # Encode categorical features
    label_encoder = LabelEncoder()
    data['Traffic Conditions'] = label_encoder.fit_transform(data['Traffic Conditions'])  # Light=0, Moderate=1, Heavy=2
    data['Road Closures'] = label_encoder.fit_transform(data['Road Closures'])  # No=0, Yes=1

    # Standardize numerical features
    scaler = StandardScaler()
    numerical_features = ['Previous Delivery Duration (min)', 'Fuel Consumption (L)', 
                          'Distance to Next Delivery (km)', 'Estimated Travel Time (min)', 'Delivery Size (items)']
    data[numerical_features] = scaler.fit_transform(data[numerical_features])

    return data

file_path = r"D:\hadoop\Book2.csv"
data = load_and_preprocess(file_path)
print("Preprocessed Data:\n", data.head())


Preprocessed Data:
           Timestamp  Traffic Conditions  Road Closures  \
0  2024/01/01 00:00                   1              0   
1  2024/01/01 01:00                   0              0   
2  2024/01/01 02:00                   2              1   
3  2024/01/01 03:00                   2              0   
4  2024/01/01 04:00                   1              0   

   Previous Delivery Duration (min)  Fuel Consumption (L)  \
0                               NaN             -0.354405   
1                               NaN              1.709663   
2                               NaN             -1.042428   
3                               NaN             -1.042428   
4                               NaN             -0.354405   

   Distance to Next Delivery (km)  Estimated Travel Time (min)  \
0                        0.175709                          NaN   
1                        0.835029                          NaN   
2                        1.164689                          NaN   


  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


In [28]:
import heapq

class AStarSearch:
    def __init__(self, distances, traffic_conditions):
        self.distances = distances
        self.traffic_conditions = traffic_conditions

    def heuristic(self, current, goal):
        return abs(ord(current) - ord(goal))  # Simple heuristic for demonstration

    def find_route(self, start, goal):
        open_list = []
        heapq.heappush(open_list, (0, start))
        came_from = {}
        g_score = {start: 0}
        f_score = {start: self.heuristic(start, goal)}

        while open_list:
            _, current = heapq.heappop(open_list)

            if current == goal:
                route = []
                while current in came_from:
                    route.append(current)
                    current = came_from[current]
                route.reverse()
                return route, g_score[goal]

            for neighbor in self.distances[current]:
                tentative_g_score = g_score[current] + self.distances[current][neighbor]
                if tentative_g_score < g_score.get(neighbor, float('inf')):
                    came_from[neighbor] = current
                    g_score[neighbor] = tentative_g_score
                    f_score[neighbor] = tentative_g_score + self.heuristic(neighbor, goal)
                    if neighbor not in [i[1] for i in open_list]:
                        heapq.heappush(open_list, (f_score[neighbor], neighbor))

        return None, float('inf')

# Example usage
distances = {
    'A': {'B': 2, 'C': 5},
    'B': {'A': 2, 'C': 3, 'D': 1},
    'C': {'A': 5, 'B': 3, 'D': 2},
    'D': {'B': 1, 'C': 2}
}
traffic_conditions = {'A': 1, 'B': 2, 'C': 0, 'D': 1}

astar = AStarSearch(distances, traffic_conditions)
route, cost = astar.find_route('A', 'D')
print("A* Search - Optimal Route:", route)
print("A* Search - Total Cost:", cost)


A* Search - Optimal Route: ['B', 'D']
A* Search - Total Cost: 3


In [29]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Assuming 'data' is your dataset with features
X = data[['Previous Delivery Duration (min)', 'Distance to Next Delivery (km)', 
          'Delivery Size (items)', 'Estimated Travel Time (min)', 'Road Closures']]
y_traffic = data['Traffic Conditions']

# Handle missing values in X
imputer = SimpleImputer(strategy='median')  # Fill NaNs with the median of each column
X = imputer.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_traffic, test_size=0.2, random_state=42)

# Define the model and train
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predict traffic conditions
y_pred = clf.predict(X_test)

# Calculate accuracy
traffic_accuracy = accuracy_score(y_test, y_pred) * 100
print(f"Traffic Conditions Prediction Accuracy: {traffic_accuracy:.2f}%")




Traffic Conditions Prediction Accuracy: 74.12%


In [31]:
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import numpy as np

# Define target variable
y_fuel = data['Fuel Consumption (L)']
y_fuel.fillna(y_fuel.median(), inplace=True)  # Handle missing values

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_fuel, test_size=0.2, random_state=42)

# Define and train the MLP Regressor
mlp = MLPRegressor(
    hidden_layer_sizes=(100, 50, 25),
    activation='relu',
    solver='adam',
    max_iter=1000,
    learning_rate='adaptive',
    random_state=42
)
mlp.fit(X_train, y_train)

# Predict and evaluate
y_pred = mlp.predict(X_test)

fuel_mae = mean_absolute_error(y_test, y_pred)
fuel_mse = mean_squared_error(y_test, y_pred)
fuel_rmse = np.sqrt(fuel_mse)
fuel_r2 = r2_score(y_test, y_pred)

print("MLP Regressor Results:")
print(f"Mean Absolute Error (MAE): {fuel_mae:.2f}")
print(f"Mean Squared Error (MSE): {fuel_mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {fuel_rmse:.2f}")
print(f"R-squared (R²): {fuel_r2:.2f}")


MLP Regressor Results:
Mean Absolute Error (MAE): 0.37
Mean Squared Error (MSE): 0.43
Root Mean Squared Error (RMSE): 0.66
R-squared (R²): 0.04


In [32]:
predicted_traffic = clf.predict(X_test)
predicted_fuel = mlp.predict(X_test)

print("\nPredicted Traffic Conditions (Sample):", predicted_traffic[:5])
print("Predicted Fuel Consumption (Sample):", predicted_fuel[:5])

print("Integration complete.")


Predicted Traffic Conditions (Sample): [1 1 1 1 0]
Predicted Fuel Consumption (Sample): [ 0.07412963  0.15964127 -0.00397078 -0.04620646 -0.33437482]
Integration complete.
