In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
import random
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

dataset = pd.read_csv('Rider-Info.csv', error_bad_lines=False)
dataset = dataset.replace([np.inf, -np.inf, np.nan], 0)
dataset = dataset.drop_duplicates()

date_columns = ['order_time', 'allot_time', 'accept_time', 'pickup_time', 'delivered_time', 'cancelled_time']

if 'order_time' in dataset.columns:
    for col in date_columns:
        dataset[col] = pd.to_datetime(dataset[col], errors='coerce')

    dataset['order_day'] = dataset['order_time'].dt.dayofweek
    dataset['order_month'] = dataset['order_time'].dt.month
    dataset['order_hour'] = dataset['order_time'].dt.hour
    dataset['pickup_hour'] = dataset['pickup_time'].dt.hour
else:
    print("The 'order_time' column does not exist in the DataFrame.")

dataset['canceled'] = dataset['canceled'].replace([np.inf, -np.inf, np.nan], 0).astype(int)
dataset['reassigned_order'] = dataset['reassigned_order'].replace([np.inf, -np.inf, np.nan], 0).astype(int)

label_encoder = LabelEncoder()
dataset['reassignment_method'] = label_encoder.fit_transform(dataset['reassignment_method'])
dataset['reassignment_reason'] = label_encoder.transform(dataset['reassignment_reason'])

print(dataset.columns)

selected_features = ['order_day', 'order_month', 'order_hour', 'pickup_hour', 'reassigned_order', 'reassignment_method', 'reassignment_reason', 'first_mile_distance', 'last_mile_distance', 'alloted_orders', 'delivered_orders', 'undelivered_orders', 'lifetime_order_count', 'session_time']
X_ml = dataset[selected_features]
y_ml = dataset['delivered_time']

X_train_ml, X_test_ml, y_train_ml, y_test_ml = train_test_split(X_ml, y_ml, test_size=0.2, random_state=42)

ml_model = RandomForestRegressor(n_estimators=100, random_state=42)
ml_model.fit(X_train_ml, y_train_ml)
ml_model.score(X_test_ml, y_test_ml)

def initialize_population(population_size):
    population = []
    for _ in range(population_size):
        order_batch = dataset.sample(frac=0.1)
        population.append(order_batch)
    return population

def fitness(batch):
    return 1 / (1 + batch['delivery_time'].sum())

def crossover(parent1, parent2):
    crossover_point = random.randint(1, len(parent1) - 1)
    child = pd.concat([parent1.iloc[:crossover_point], parent2.iloc[crossover_point:]])
    return child

def mutate(batch):
    mutated_batch = batch.copy()
    indices = random.sample(range(len(mutated_batch)), 2)
    mutated_batch.iloc[indices] = mutated_batch.iloc[indices[::-1]]
    return mutated_batch

def genetic_algorithm(population_size, generations):
    population = initialize_population(population_size)

    for generation in range(generations):
        fitness_scores = [fitness(batch) for batch in population]
        parents = [population[i] for i in np.argsort(fitness_scores)[:2]]
        offspring = [crossover(parents[0], parents[1]) for _ in range(population_size - 2)]
        offspring += [mutate(parent) for parent in parents]
        population = offspring

    best_batch = max(population, key=fitness)
    return best_batch

X_train_ml = dataset[selected_features]
y_train_ml = dataset['delivery_time']
ml_model = RandomForestRegressor()
ml_model.fit(X_train_ml, y_train_ml)

def apply_delivery_rules(order1, order2):
    if (
        order1['kitchen'] == order2['kitchen']
        and order1['customer'] == order2['customer']
        and abs(order1['ready_time'] - order2['ready_time']) <= 10
    ):
        return True
    elif (
        order1['kitchen'] != order2['kitchen']
        and order1['customer'] == order2['customer']
        and abs(order1['ready_time'] - order2['ready_time']) <= 10
    ):
        return True
    elif (
        order1['kitchen'] == order2['kitchen']
        and order1['customer'] != order2['customer']
        and abs(order1['ready_time'] - order2['ready_time']) <= 10
    ):
        return True
    elif (
        order1['kitchen'] != order2['kitchen']
        and order1['customer'] == order2['customer']
        and abs(order1['ready_time'] - order2['ready_time']) <= 10
    ):
        return True
    elif (
        order1['kitchen'] != order2['kitchen']
        and order1['customer'] == order2['customer']
        and abs(order1['ready_time'] - order2['ready_time']) <= 10
    ):
        return True
    elif (
        order1['customer'] == order2['customer']
        and order2['kitchen'] in order1['on_the_way']
        and abs(order2['ready_time'] - order1['time_to_second_kitchen']) <= 10
    ):
        return True
    elif (
        order1['customer'] != order2['customer']
        and order2['customer'] in order1['on_the_way']
        and order2['kitchen'] in order1['on_the_way']
        and abs(order1['ready_time'] - order2['ready_time']) <= 10
    ):
        return True
    elif (
        order1['kitchen'] == order2['kitchen']
        and order2['customer'] in order1['on_the_way']
        and abs(order1['ready_time'] - order2['ready_time']) <= 10
    ):
        return True
    return False

def apply_simulated_annealing(batch, predictions):
    initial_temperature = 100.0
    final_temperature = 0.1
    cooling_rate = 0.95

    current_state = batch.copy()
    current_cost = mean_squared_error(predictions, current_state['delivery_time'])

    while initial_temperature > final_temperature:
        neighbor_state = current_state.copy()
        indices = random.sample(range(len(neighbor_state)), 2)
        neighbor_state.iloc[indices] = neighbor_state.iloc[indices[::-1]]
        neighbor_predictions = ml_model.predict(neighbor_state[selected_features])

        neighbor_cost = mean_squared_error(neighbor_predictions, neighbor_state['delivery_time'])

        if neighbor_cost < current_cost or random.uniform(0, 1) < np.exp((current_cost - neighbor_cost) / initial_temperature):
            current_state = neighbor_state
            current_cost = neighbor_cost

        initial_temperature *= cooling_rate

    batch.iloc[:] = current_state.values

def optimize_delivery():
    dataset['order_time'] = pd.to_datetime(dataset['order_time'], errors='coerce')
    dataset['pickup_time'] = pd.to_datetime(dataset['pickup_time'], errors='coerce')

    best_batch = genetic_algorithm(population_size=10, generations=5)

    for i, batch in enumerate(best_batch):
        for j in range(i + 1, len(best_batch)):
            if apply_delivery_rules(batch.iloc[i], batch.iloc[j]):
                batch.at[i, 'assigned_rider'] = batch.at[j, 'assigned_rider']

        batch_features = scaler.transform(batch[selected_features])
        batch_predictions = ml_model.predict(batch_features)

        apply_simulated_annealing(batch, batch_predictions)

def apply_simulated_annealing_nn(batch, predictions):
    initial_temperature = 100.0
    final_temperature = 0.1
    cooling_rate = 0.95

    current_state = batch.copy()
    current_cost = mean_squared_error(predictions, current_state['delivery_time'])

    while initial_temperature > final_temperature:
        neighbor_state = current_state.copy()
        indices = random.sample(range(len(neighbor_state)), 2)
        neighbor_state.iloc[indices] = neighbor_state.iloc[indices[::-1]]
        neighbor_predictions = nn_model.predict(scaler_nn.transform(neighbor_state[selected_features]))

        neighbor_cost = mean_squared_error(neighbor_predictions, neighbor_state['delivery_time'])

        if neighbor_cost < current_cost or random.uniform(0, 1) < np.exp((current_cost - neighbor_cost) / initial_temperature):
            current_state = neighbor_state
            current_cost = neighbor_cost

        initial_temperature *= cooling_rate

    batch.iloc[:] = current_state.values

def optimize_delivery_nn():
    dataset['order_time'] = pd.to_datetime(dataset['order_time'], errors='coerce')
    dataset['pickup_time'] = pd.to_datetime(dataset['pickup_time'], errors='coerce')

    best_batch = genetic_algorithm(population_size=10, generations=5)

    for i, batch in enumerate(best_batch):
        for j in range(i + 1, len(best_batch)):
            if apply_delivery_rules(batch.iloc[i], batch.iloc[j]):
                batch.at[i, 'assigned_rider'] = batch.at[j, 'assigned_rider']

        batch_features = scaler_nn.transform(batch[selected_features])
        batch_predictions = nn_model.predict(batch_features)

        apply_simulated_annealing_nn(batch, batch_predictions)

scaler_nn = StandardScaler()
X_nn = scaler_nn.fit_transform(X_ml)

X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(X_nn, y_ml, test_size=0.2, random_state=42)

nn_model = Sequential()
nn_model.add(Dense(64, activation='relu', input_dim=X_nn.shape[1]))
nn_model.add(Dense(32, activation='relu'))
nn_model.add(Dense(1, activation='linear'))

nn_model.compile(optimizer='adam', loss='mean_squared_error')

nn_model.fit(X_train_nn, y_train_nn, epochs=50, batch_size=32, validation_data=(X_test_nn, y_test_nn))

def apply_simulated_annealing_nn(batch, predictions):
    initial_temperature = 100.0
    final_temperature = 0.1
    cooling_rate = 0.95

    current_state = batch.copy()
    current_cost = mean_squared_error(predictions, current_state['delivery_time'])

    while initial_temperature > final_temperature:
        neighbor_state = current_state.copy()
        indices = random.sample(range(len(neighbor_state)), 2)
        neighbor_state.iloc[indices] = neighbor_state.iloc[indices[::-1]]
        neighbor_predictions = nn_model.predict(scaler_nn.transform(neighbor_state[selected_features]))

        neighbor_cost = mean_squared_error(neighbor_predictions, neighbor_state['delivery_time'])

        if neighbor_cost < current_cost or random.uniform(0, 1) < np.exp((current_cost - neighbor_cost) / initial_temperature):
            current_state = neighbor_state
            current_cost = neighbor_cost

        initial_temperature *= cooling_rate

    batch.iloc[:] = current_state.values

def optimize_delivery_nn():
    dataset['order_date'] = pd.to_datetime(dataset['order_date'], errors='coerce')
    dataset['pickup_time'] = pd.to_datetime(dataset['pickup_time'], errors='coerce')

    best_batch = genetic_algorithm(population_size=10, generations=5)

    for i, batch in enumerate(best_batch):
        for j in range(i + 1, len(best_batch)):
            if apply_delivery_rules(batch.iloc[i], batch.iloc[j]):
                batch.at[i, 'assigned_rider'] = batch.at[j, 'assigned_rider']

        batch_features = scaler_nn.transform(batch[selected_features])
        batch_predictions = nn_model.predict(batch_features)

        apply_simulated_annealing_nn(batch, batch_predictions)

optimize_delivery_nn()

optimized_predictions_nn = nn_model.predict(scaler_nn.transform(X_test_nn))
optimized_mse_nn = mean_squared_error(optimized_predictions_nn, y_test_nn)
print(f"Optimized Neural Network MSE: {optimized_mse_nn}")

baseline_predictions = ml_model.predict(X_test_ml)
baseline_mse = mean_squared_error(baseline_predictions, y_test_ml)
print(f"Baseline Random Forest MSE: {baseline_mse}")

if optimized_mse_nn < baseline_mse:
    print("The Neural Network-based approach outperforms the Random Forest baseline.")
else:
    print("The Random Forest baseline outperforms the Neural Network-based approach.")

nn_model.save('optimized_nn_model.h5')
best_batch.to_csv('best_batch.csv', index=False)

import joblib
joblib.dump(ml_model, 'random_forest_model.joblib')
joblib.dump(label_encoder, 'label_encoder.joblib')
