# Predict travel time matrices

This section  calculates the travel time matrices for ride-sharing services based on predicted travel times using a pre-trained XGBoost model. We will use the OSRM API to calculate road distances between locations and then predict travel times at 15-minute intervals.

## 1. Load Sample Locations
Load the sample locations dataset.


In [None]:
import pandas as pd
import numpy as np
import requests
import logging
from datetime import datetime, timedelta
import xgboost as xgb

# Load sample locations
locations_df = pd.read_csv('/content/sample_locations.csv')

In [None]:
locations_df.columns

Index(['latitude', 'longitude', 'location_index'], dtype='object')

In [None]:
locations_df

Unnamed: 0,latitude,longitude,location_index
0,41.93476,-87.639854,0
1,41.900223,-87.629105,1
2,41.911972,-87.68364,2
3,41.878864,-87.62519,3
4,41.929047,-87.65131,4
5,41.89959,-87.67472,5
6,41.91475,-87.65401,6
7,41.89833,-87.620766,7
8,41.929077,-87.64629,8
9,41.94287,-87.70133,9


## 2. Calculate Road Distances
Define a function to calculate road distances using the OSRM API and create a distance matrix.


In [None]:
# Function to get road distance using OSRM API
def get_road_distance_osrm(pickup_latitude, pickup_longitude, dropoff_latitude, dropoff_longitude):
    url = f"http://router.project-osrm.org/route/v1/driving/{pickup_longitude},{pickup_latitude};{dropoff_longitude},{dropoff_latitude}?overview=false"
    response = requests.get(url)
    data = response.json()
    if data['code'] == 'Ok':
        distance_meters = data['routes'][0]['distance']
        distance_miles = distance_meters * 0.000621371  # Convert meters to miles
        return distance_miles
    else:
        logging.error(f"Error: {data['code']}")
        return None

# Create distance matrix
num_locations = len(locations_df)
distance_matrix = np.zeros((num_locations, num_locations))

for i in range(num_locations):
    for j in range(num_locations):
        if i != j:
            pickup_lat = locations_df.loc[i, 'latitude']
            pickup_long = locations_df.loc[i, 'longitude']
            dropoff_lat = locations_df.loc[j, 'latitude']
            dropoff_long = locations_df.loc[j, 'longitude']
            distance = get_road_distance_osrm(pickup_lat, pickup_long, dropoff_lat, dropoff_long)
            distance_matrix[i, j] = distance
        else:
            distance_matrix[i, j] = 0  # Distance to self is 0

In [None]:
distance_matrix_df = pd.DataFrame(distance_matrix)
distance_matrix_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,0.0,2.905779,4.060659,4.317721,1.003825,4.263786,2.19201,3.070194,0.779261,3.841316,...,5.057028,1.944332,6.000021,1.536278,4.309581,3.385353,5.758742,6.626114,2.186418,1.747668
1,2.760379,0.0,3.392686,1.818442,2.699298,2.887511,1.896362,0.773296,2.549112,5.590102,...,3.110708,2.479643,4.053638,1.248645,3.120214,2.717318,3.794961,5.081448,2.926347,3.409028
2,4.064885,3.455258,0.0,4.76256,3.310913,1.302207,1.976892,3.954467,3.320296,2.981214,...,2.876326,2.560173,3.544922,2.992212,1.094856,0.929509,5.046837,2.784426,2.274342,3.065596
3,4.962207,1.797875,5.055288,0.0,5.167259,4.286031,3.693367,2.117011,5.017074,7.253202,...,2.787222,4.27671,3.339248,3.590095,4.637665,4.379982,2.506114,6.332143,5.451412,5.876989
4,1.003825,2.761248,3.305632,4.387811,0.0,3.508758,1.187751,3.211742,0.258677,3.157807,...,4.302,0.940942,5.24493,1.731637,3.554491,2.630263,5.003652,5.871086,1.183028,0.85569
5,4.522276,2.910377,1.303326,4.219606,3.519197,0.0,2.434283,3.20236,3.777687,3.797882,...,1.825712,3.017626,2.49437,3.449665,0.296767,1.3869,3.827645,2.759446,2.727446,3.5187
6,2.190892,1.96086,1.970927,3.587423,1.187751,2.174053,0.0,2.46007,1.446303,3.576798,...,3.4245,0.685248,4.36743,1.497815,2.219848,1.295621,4.021948,4.536381,1.226089,1.79278
7,3.007995,0.507784,3.895685,1.727039,3.076408,3.328933,2.3993,0.0,2.926222,6.09304,...,3.422822,2.982643,4.365753,1.499244,3.432329,3.220317,4.124474,5.393562,3.429347,3.786138
8,0.778143,2.612057,3.314952,4.23862,0.258677,3.518078,1.446303,3.062551,0.0,3.779924,...,4.311321,1.199495,5.254251,1.582446,3.563873,2.639646,5.013035,5.880407,1.440711,1.114367
9,3.841626,5.657707,2.978107,6.95774,3.157807,4.466042,3.576798,6.156979,3.779924,0.0,...,5.864997,3.187012,6.807927,5.194724,3.589536,3.041549,7.242079,4.111674,2.821024,2.804558


In [None]:
# Save distance matrix
np.save('distance_matrix.npy', distance_matrix)

## 3. Prepare Dataset for prediction
Define a function to prepare the dataset by adding temporal features and a function to prepare the data for prediction by adding distance and temporal features.


In [None]:
# Function to prepare dataset
def prepare_dataset(df):
    df['trip_start_timestamp'] = pd.to_datetime(df['trip_start_timestamp'])
    df['hour_cos'] = np.cos(2 * np.pi * df['trip_start_timestamp'].dt.hour / 24)
    df['hour_sin'] = np.sin(2 * np.pi * df['trip_start_timestamp'].dt.hour / 24)
    df['day_cos'] = np.cos(2 * np.pi * df['trip_start_timestamp'].dt.dayofweek / 6)
    df['day_sin'] = np.sin(2 * np.pi * df['trip_start_timestamp'].dt.dayofweek / 6)
    df['quarter_cos'] = np.cos(2 * np.pi * df['trip_start_timestamp'].dt.minute / 15)
    df['quarter_sin'] = np.sin(2 * np.pi * df['trip_start_timestamp'].dt.minute / 15)
    df['is_weekend'] = (df['trip_start_timestamp'].dt.dayofweek >= 5).astype(int)
    df['is_morning_rush'] = ((df['trip_start_timestamp'].dt.hour >= 6) & (df['trip_start_timestamp'].dt.hour < 9)).astype(int)
    df['is_evening_rush'] = ((df['trip_start_timestamp'].dt.hour >= 16) & (df['trip_start_timestamp'].dt.hour < 19)).astype(int)
    return df

# Function to prepare data for prediction

def prepare_data_for_prediction(locations_df, distance_matrix, timestamp):
    data = []
    num_locations = len(locations_df)
    for i in range(num_locations):
        for j in range(num_locations):
            distance = distance_matrix[i, j]
            features = {
                'pickup_latitude': locations_df.loc[i, 'latitude'],
                'pickup_longitude': locations_df.loc[i, 'longitude'],
                'dropoff_latitude': locations_df.loc[j, 'latitude'],
                'dropoff_longitude': locations_df.loc[j, 'longitude'],
                'trip_distance_miles': distance,
                'trip_start_timestamp': timestamp
            }
            data.append(features)
    df = pd.DataFrame(data)
    df = prepare_dataset(df)
    return df


## 4. Predict and Save Travel Times
Define a function to predict travel times and save each travel time matrix to a CSV file.


In [None]:
import os
# Function to predict travel times for a given interval and save each matrix to CSV
def predict_and_save_travel_times(model, scaler, locations_df, distance_matrix, start_time, interval_minutes, total_duration_minutes, output_folder):
    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    current_time = start_time
    end_time = start_time + timedelta(minutes=total_duration_minutes)

    while current_time < end_time:
        data = prepare_data_for_prediction(locations_df, distance_matrix, current_time)
        feature_columns = [
            'pickup_latitude', 'pickup_longitude', 'dropoff_latitude',
            'dropoff_longitude', 'trip_distance_miles',
            'hour_cos', 'hour_sin', 'day_cos', 'day_sin', 'quarter_cos',
            'quarter_sin', 'is_weekend', 'is_morning_rush', 'is_evening_rush'
        ]
        scaled_features = scaler.transform(data[feature_columns])
        dmatrix_features = xgb.DMatrix(scaled_features, feature_names=feature_columns)
        travel_times = model.predict(dmatrix_features)

        # Ensure the shape matches 30x30
        if travel_times.size != num_locations * num_locations:
            raise ValueError(f'Predicted travel times size {travel_times.size} does not match expected size {num_locations * num_locations}')

        travel_time_matrix = travel_times.reshape(num_locations, num_locations)

        # Set diagonal to 0 for self-travel times
        np.fill_diagonal(travel_time_matrix, 0)

        # Save the travel time matrix to a CSV file with the hour and minute in the filename
        timestamp_str = current_time.strftime('%H%M')
        filename = os.path.join(output_folder, f'travel_time_matrix_{timestamp_str}.csv')
        travel_time_df = pd.DataFrame(travel_time_matrix)
        travel_time_df.to_csv(filename, index=False)

        logging.info(f'Saved travel time matrix for {current_time} to {filename}')

        current_time += timedelta(minutes=interval_minutes)


### **Create the updated travel time matrices**

In [None]:
import joblib
# Load the travel time scaler and model
travel_time_scaler = joblib.load('xgboost_tt_scaler_2.pkl')
travel_time_model = joblib.load('xgboost_tt_model_2.pkl')

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [None]:
# Predict travel times for a selected interval and store in a list
start_time = datetime.strptime('2024-06-28 08:00:00', '%Y-%m-%d %H:%M:%S')
interval_minutes = 15
total_duration_minutes = 240  # 4 hours

travel_time_matrices = predict_and_store_travel_times(travel_time_model, travel_time_scaler, locations_df, distance_matrix, start_time, interval_minutes, total_duration_minutes)

# Display the first travel time matrix as an example
first_matrix_timestamp, first_travel_time_matrix = travel_time_matrices[0]
print(f"First travel time matrix for {first_matrix_timestamp}:\n", first_travel_time_matrix)

First travel time matrix for 2024-06-28 08:00:00:
 [[   0.        542.1421    738.1459    772.9184    195.06772   758.6506
   418.9464    566.7726    167.6464    734.00116   213.20447   522.1075
   850.9176    122.6246    124.2158   1082.1475    880.92163    98.00264
   301.33072  1055.2412    941.0432    376.30948  1084.3436    312.21902
   759.72833   632.4282   1020.7241   1157.7777    427.44278   358.00763 ]
 [ 574.317       0.        611.4754    324.80475   538.5053    512.79816
   354.65646   135.83223   530.49536  1017.1494    685.9153   1003.3499
   382.252     554.20734   565.7835    652.2109    499.35028   527.67426
   577.5256    603.6556    555.2188    450.65335   734.92175   231.89352
   554.96106   482.74677   696.22437   937.7364    531.14215   643.04144 ]
 [ 715.37225   600.2462      0.        777.4712    572.92615   248.55296
   361.43668   684.6425    572.0227    538.36536   828.52094   778.66046
   810.0382    683.72375   648.77075   501.89722   695.4645    685.5915


In [None]:
mat=pd.read_csv('/content/tt_matrices/travel_time_matrix_0800.csv')
mat

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,0.0,542.1421,738.1459,772.9184,195.06772,758.6506,418.9464,566.7726,167.6464,734.00116,...,941.0432,376.30948,1084.3436,312.21902,759.72833,632.4282,1020.7241,1157.7777,427.44278,358.00763
1,574.317,0.0,611.4754,324.80475,538.5053,512.79816,354.65646,135.83223,530.49536,1017.1494,...,555.2188,450.65335,734.92175,231.89352,554.96106,482.74677,696.22437,937.7364,531.14215,643.04144
2,715.37225,600.2462,0.0,777.4712,572.92615,248.55296,361.43668,684.6425,572.0227,538.36536,...,503.19916,467.94006,636.21295,546.4787,271.6014,216.3135,881.32275,498.47382,410.00238,542.8587
3,920.50793,327.4763,896.26337,0.0,986.4193,741.30444,669.64496,367.01837,962.80286,1300.8174,...,492.18024,760.4066,580.4131,619.5855,811.6148,775.61145,465.45898,1108.7645,930.4875,1061.1318
4,262.25287,546.36017,624.3898,834.0632,0.0,661.9472,276.19904,613.03516,109.290695,607.4795,...,823.0446,239.03656,1005.9517,361.06775,662.6274,498.82242,950.0804,1043.8782,293.9637,210.93425
5,855.3886,527.7377,237.13063,704.08386,678.67645,0.0,457.86087,577.9452,737.9616,734.44617,...,345.99792,559.9703,442.64832,617.2326,177.13353,256.66263,693.50104,506.43164,476.48575,678.3737
6,445.32462,363.83157,357.2932,623.1883,246.22227,375.78497,0.0,446.26096,286.86606,683.4513,...,610.6032,141.61095,799.00934,288.88403,402.36664,268.6837,720.6149,848.83514,270.26352,368.01126
7,584.48694,116.866356,691.5048,300.87766,580.76013,585.63135,452.3684,0.0,567.04065,1121.0657,...,610.4939,540.076,779.6843,277.7201,615.39417,577.1097,714.0031,986.00964,621.3977,725.8037
8,178.84561,520.2509,622.02704,789.6821,109.42517,658.9738,306.76254,596.1869,0.0,724.4212,...,821.0527,274.53333,1023.7141,338.88245,659.4589,495.21304,952.0434,1038.7172,310.62213,257.17004
9,734.52356,1015.2168,568.8082,1257.2163,573.0308,828.9326,694.036,1089.2032,711.97736,0.0,...,1062.885,591.1176,1258.0298,937.91907,681.89886,577.2604,1297.8286,743.74915,532.1894,519.15735


In [None]:
mat2=pd.read_csv('/content/tt_matrices/travel_time_matrix_0830.csv')
mat2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,0.0,544.60986,739.76996,772.33234,194.44266,759.7918,422.61856,567.7442,168.37994,732.5186,...,944.37085,378.55106,1083.5426,314.37955,760.86957,635.409,1023.6171,1154.0916,430.85535,369.7624
1,594.5121,0.0,613.52637,324.46188,541.0716,515.08105,356.50745,134.95952,533.5962,1011.7566,...,556.8636,452.75528,736.4916,232.76013,557.4992,484.74973,696.1477,939.0498,533.0547,645.5869
2,715.95984,599.1335,0.0,776.5282,576.3463,265.0513,380.62344,688.8128,575.60455,540.0299,...,518.2068,484.43454,640.7807,561.4066,281.4429,230.24275,882.8014,513.66455,426.49704,548.02454
3,920.29736,326.67575,897.0371,0.0,983.4876,740.6434,669.248,366.366,964.9242,1292.6575,...,492.0421,759.4947,581.1115,618.1124,810.85156,774.61993,465.5022,1104.7762,929.75073,1058.2341
4,268.42184,548.9859,627.21985,833.4199,0.0,664.559,275.40717,614.42,95.32886,607.21454,...,823.3404,239.27597,1004.9189,364.22852,665.40765,502.2095,952.2727,1039.2681,292.15726,211.01782
5,856.3372,528.0311,239.63573,705.5438,680.67523,0.0,459.04126,578.7852,739.66724,728.7772,...,345.45767,561.6003,443.03214,617.7217,139.40677,259.4555,692.5855,508.28397,477.50415,680.7112
6,466.5956,366.7488,362.41043,621.7168,245.39337,378.48056,0.0,446.57593,289.50806,681.95135,...,612.03815,140.8184,800.10706,290.84433,405.23065,268.71878,721.91534,846.7146,269.15335,380.70737
7,603.96924,102.19672,691.7032,300.73563,581.7764,586.5464,453.67645,0.0,569.729,1112.4946,...,611.919,540.88104,780.77985,277.89447,616.86,578.19257,714.1016,984.6233,622.49316,726.99634
8,186.40462,523.37366,625.5151,793.9935,95.924324,662.3555,309.71024,598.15234,0.0,724.08026,...,822.9661,273.9466,1026.5392,342.6008,663.0091,498.86508,955.7216,1034.8771,313.50714,255.09511
9,740.08655,1011.70306,587.3941,1250.0309,579.3818,829.4704,690.9844,1091.5977,711.6472,0.0,...,1060.93,594.6667,1255.3242,934.99164,679.574,581.42834,1294.4204,742.86444,550.1745,524.4118


In [None]:
mat3=pd.read_csv('/content/tt_matrices/travel_time_matrix_1030.csv')
mat3

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,0.0,538.1624,762.6593,781.36115,179.98955,774.16614,424.66043,568.3795,159.95497,738.5763,...,959.7331,382.71484,1115.573,311.34918,778.0784,645.7053,1038.5724,1179.051,440.98215,355.96823
1,538.21674,0.0,629.20764,330.3944,547.48267,538.25275,361.95096,137.71637,532.49976,1032.4858,...,576.39307,465.14948,764.97314,228.70894,574.57794,515.0778,706.99524,953.25275,558.6258,653.49347
2,732.73553,600.0243,0.0,784.4942,583.06213,228.75955,374.35648,701.03253,581.392,556.0966,...,530.4189,487.8429,661.8808,543.22815,177.23645,176.12383,898.6358,515.3212,436.09357,566.3601
3,936.55383,330.54974,934.45825,0.0,1018.7336,761.17725,687.18616,373.8276,991.9092,1326.7362,...,527.65015,774.4001,598.9201,627.6653,824.78467,789.76465,486.24432,1166.4069,950.8892,1078.3864
4,178.48442,546.6075,637.53516,831.4579,0.0,685.2768,239.27399,618.72485,76.59652,620.2869,...,840.68304,196.7234,1037.0194,357.41534,686.4791,529.25964,963.21814,1039.2446,258.3295,167.54889
5,856.31006,534.97437,219.57373,723.8709,695.7798,0.0,473.7124,590.8102,753.5123,749.70996,...,349.9796,578.67694,475.76804,626.2127,123.810005,239.48215,710.4218,531.0003,519.7619,703.45123
6,430.82925,368.53375,376.241,636.4532,207.6478,394.36755,0.0,452.95932,268.55362,702.73724,...,632.2059,131.67616,813.17175,281.83395,422.91437,225.78549,736.82245,844.6097,235.66234,368.6325
7,572.10547,101.17292,712.7724,305.1046,588.4062,598.76746,466.92245,0.0,572.0437,1149.7671,...,628.386,555.5709,792.4587,276.01843,633.12665,595.4237,735.1461,996.6493,638.90186,743.3155
8,171.1956,511.84546,635.4242,799.55695,78.223526,682.028,290.46848,596.4608,0.0,732.91473,...,839.7382,240.50403,1043.9801,334.70248,683.0353,526.07336,966.15497,1039.9825,296.8933,193.39383
9,726.31476,1016.8131,563.0411,1270.0026,577.7319,821.1244,688.964,1100.0321,710.1355,0.0,...,1070.0887,589.7169,1262.3654,957.5916,680.0217,580.29315,1302.7347,754.8232,531.85345,529.3829


# Create a sample to simulate trips for a journey

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# Define the start and end times
start_time = datetime.strptime('2024-06-28 08:00:00', '%Y-%m-%d %H:%M:%S')
end_time = datetime.strptime('2024-06-28 11:45:00', '%Y-%m-%d %H:%M:%S')



# Function to generate timestamps with varying frequencies
def generate_timestamps(start_time, end_time, total_trips):
    timestamps = []
    current_time = start_time

    while current_time < end_time and len(timestamps) < total_trips:
        if current_time.hour == 8:  # Rush hour, higher frequency
            num_trips = random.randint(5, 20)
        else:  # Non-rush hour, lower frequency
            num_trips = random.randint(4, 10)

        for _ in range(num_trips):
            if len(timestamps) < total_trips:
                minute_offset = random.randint(0, 14)  # Any minute within the 15-minute interval
                timestamps.append(current_time + timedelta(minutes=minute_offset))

        current_time += timedelta(minutes=15)

    return timestamps[:total_trips]  # Ensure exactly total_trips are generated

# Generate 100 timestamps
timestamps = generate_timestamps(start_time, end_time, 200)

# Function to generate random trip data
def generate_trip_data(timestamps, locations_df):
    trips = []
    for timestamp in timestamps:
        pickup_index = random.randint(0, num_locations - 1)
        dropoff_index = random.randint(0, num_locations - 1)
        while dropoff_index == pickup_index:
            dropoff_index = random.randint(0, num_locations - 1)
        trip = {
            'pickup_index': pickup_index,
            'pickup_latitude': locations_df.loc[pickup_index, 'latitude'],
            'pickup_longitude': locations_df.loc[pickup_index, 'longitude'],
            'dropoff_index': dropoff_index,
            'dropoff_latitude': locations_df.loc[dropoff_index, 'latitude'],
            'dropoff_longitude': locations_df.loc[dropoff_index, 'longitude'],
            'trip_start_timestamp': timestamp
        }
        trips.append(trip)
    return pd.DataFrame(trips)

# Generate trip data
trip_data = generate_trip_data(timestamps, locations_df)

# Display the first few rows of the generated trip data
trip_data.head()


Unnamed: 0,pickup_index,pickup_latitude,pickup_longitude,dropoff_index,dropoff_latitude,dropoff_longitude,trip_start_timestamp
0,22,41.870415,-87.67509,27,41.90007,-87.72092,2024-06-28 08:02:00
1,4,41.929047,-87.65131,16,41.879066,-87.657005,2024-06-28 08:03:00
2,24,41.901207,-87.67635,1,41.900223,-87.629105,2024-06-28 08:10:00
3,15,41.877384,-87.68066,9,41.94287,-87.70133,2024-06-28 08:05:00
4,0,41.93476,-87.639854,27,41.90007,-87.72092,2024-06-28 08:06:00


In [None]:
trip_data.shape

(108, 7)

In [None]:
trip_data = trip_data.sort_values(by="trip_start_timestamp").reset_index(drop=True)

In [None]:
trip_data.head(30)

Unnamed: 0,pickup_index,pickup_latitude,pickup_longitude,dropoff_index,dropoff_latitude,dropoff_longitude,trip_start_timestamp
0,3,41.878864,-87.62519,18,41.928947,-87.6609,2024-06-28 08:00:00
1,6,41.91475,-87.65401,4,41.929047,-87.65131,2024-06-28 08:00:00
2,22,41.870415,-87.67509,27,41.90007,-87.72092,2024-06-28 08:02:00
3,4,41.929047,-87.65131,16,41.879066,-87.657005,2024-06-28 08:03:00
4,15,41.877384,-87.68066,9,41.94287,-87.70133,2024-06-28 08:05:00
5,0,41.93476,-87.639854,27,41.90007,-87.72092,2024-06-28 08:06:00
6,24,41.901207,-87.67635,1,41.900223,-87.629105,2024-06-28 08:10:00
7,16,41.879066,-87.657005,22,41.870415,-87.67509,2024-06-28 08:18:00
8,18,41.928947,-87.6609,0,41.93476,-87.639854,2024-06-28 08:18:00
9,15,41.877384,-87.68066,11,41.95753,-87.66661,2024-06-28 08:18:00


In [None]:
trip_data.tail()

Unnamed: 0,pickup_index,pickup_latitude,pickup_longitude,dropoff_index,dropoff_latitude,dropoff_longitude,trip_start_timestamp
103,18,41.928947,-87.6609,19,41.870605,-87.62217,2024-06-28 11:30:00
104,7,41.89833,-87.620766,6,41.91475,-87.65401,2024-06-28 11:37:00
105,26,41.863422,-87.653755,20,41.878666,-87.67165,2024-06-28 11:39:00
106,16,41.879066,-87.657005,7,41.89833,-87.620766,2024-06-28 11:40:00
107,19,41.870605,-87.62217,7,41.89833,-87.620766,2024-06-28 11:40:00


In [None]:
trip_data_csv= trip_data.to_csv('sample_trips.csv', index=False)



# **Optimize routes**

In [None]:
import zipfile
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Load distance matrix (in miles)
physical_distance_matrix = np.load('/content/distance_matrix.npy')

# Load generated trip data
trip_data = pd.read_csv('/content/sample_trips.csv', delimiter=';')

# Convert pickup_time to datetime
trip_data['trip_start_timestamp'] = pd.to_datetime(trip_data['trip_start_timestamp'], format='%d/%m/%Y %H:%M')

# Replace 'your_zip_file.zip' with the path to your zip file
zip_file_path = '/content/tt_matrices.zip'

# Function to read CSV files from a zip archive and store as NumPy arrays in a list
def read_matrices_from_zip(zip_file_path):
    matrices = []
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        csv_files = [name for name in zip_ref.namelist() if name.endswith('.csv')]
        csv_files.sort()
        for csv_file in csv_files:
            with zip_ref.open(csv_file) as file:
                df = pd.read_csv(file, header=None)
                matrix = df.to_numpy()
                # Remove the first row of the matrix
                matrix = matrix[1:, :]
                matrices.append(matrix)
    return matrices

# Call the function to read matrices from the zip file
travel_time_matrices = read_matrices_from_zip(zip_file_path)

# Define the algorithm functions
def fitness(individual, travel_time_matrix, distance_matrix, trip_requests):
    total_distance = 0
    total_travel_time = 0
    time_penalty = 0
    penalty_used = False
    served_trips = set()
    total_trips = set((int(row['pickup_index']), int(row['dropoff_index']), row['trip_start_timestamp']) for index, row in trip_requests.iterrows())

    for vehicle_route in individual:
        if len(vehicle_route) == 0:
            continue

        current_time = datetime.strptime("2024-06-28 08:15:00", '%Y-%m-%d %H:%M:%S')
        vehicle_distance = 0
        vehicle_travel_time = 0

        for i in range(len(vehicle_route)):
            pickup, dropoff, trip_start = vehicle_route[i]
            if (pickup, dropoff, trip_start) in served_trips:
                penalty_used = True
                time_penalty += 1000  # Assign a high penalty for duplicated trips
                continue

            served_trips.add((pickup, dropoff, trip_start))

            if i == 0:
                vehicle_distance += distance_matrix[0][pickup]
                vehicle_travel_time += travel_time_matrix[0][pickup]
            else:
                previous_dropoff = vehicle_route[i - 1][1]
                vehicle_distance += distance_matrix[previous_dropoff][pickup]
                vehicle_travel_time += travel_time_matrix[previous_dropoff][pickup]
                current_time += timedelta(seconds=float(travel_time_matrix[previous_dropoff][pickup]))

            if current_time > trip_start + timedelta(minutes=2):
                penalty_used = True
                time_penalty += (current_time - trip_start).total_seconds() / 60

            current_time += timedelta(seconds=float(travel_time_matrix[pickup][dropoff]))
            vehicle_distance += distance_matrix[pickup][dropoff]
            vehicle_travel_time += travel_time_matrix[pickup][dropoff]

        last_dropoff = vehicle_route[-1][1]
        vehicle_distance += distance_matrix[last_dropoff][0]
        vehicle_travel_time += travel_time_matrix[last_dropoff][0]
        total_distance += vehicle_distance
        total_travel_time += vehicle_travel_time

    # Penalty for unserved trips
    unserved_trips = total_trips - served_trips
    time_penalty += len(unserved_trips) * 1000

    return total_distance + time_penalty, penalty_used, total_distance, total_travel_time

def create_initial_population(pop_size, trip_requests, num_vehicles):
    population = []
    trip_requests_list = trip_requests.to_dict('records')
    for _ in range(pop_size):
        individual = [[] for _ in range(num_vehicles)]
        shuffled_requests = random.sample(trip_requests_list, len(trip_requests_list))
        for request in shuffled_requests:
            vehicle_index = random.randint(0, num_vehicles - 1)
            individual[vehicle_index].append((int(request['pickup_index']), int(request['dropoff_index']), request['trip_start_timestamp']))
        # Sort each vehicle's route by trip_start_timestamp to maintain chronological order
        for route in individual:
            route.sort(key=lambda x: x[2])
        population.append(individual)
    return population

def crossover(parent1, parent2, num_vehicles):
    child = [[] for _ in range(num_vehicles)]
    for i in range(num_vehicles):
        if random.random() < 0.5:
            child[i] = parent1[i]
        else:
            child[i] = parent2[i]
    # Sort each vehicle's route by trip_start_timestamp to maintain chronological order
    for route in child:
        route.sort(key=lambda x: x[2])
    return child

def mutate(individual, mutation_rate):
    for i in range(len(individual)):
        if random.random() < mutation_rate:
            if len(individual[i]) > 0:
                swap_with = random.randint(0, len(individual[i]) - 1)
                individual[i][swap_with], individual[i][0] = individual[i][0], individual[i][swap_with]
    # Sort each vehicle's route by trip_start_timestamp to maintain chronological order
    for route in individual:
        route.sort(key=lambda x: x[2])

def adjust(individual, distance_matrix, vehicle_capacity):
    for route in individual:
        if len(route) > vehicle_capacity:
            route[:] = route[:vehicle_capacity]

def genetic_algorithm(travel_time_matrix, distance_matrix, trip_requests, pop_size, num_generations, mutation_rate, crossover_rate, elitism_rate, num_vehicles, vehicle_capacity):
    population = create_initial_population(pop_size, trip_requests, num_vehicles)
    for individual in population:
        adjust(individual, distance_matrix, vehicle_capacity)

    best_fitness = float('inf')

    for generation in range(num_generations):
        fitness_scores = [fitness(individual, travel_time_matrix, distance_matrix, trip_requests) for individual in population]
        fitness_values = [score[0] for score in fitness_scores]
        penalties_used = [score[1] for score in fitness_scores]
        sorted_population = [x for _, x in sorted(zip(fitness_values, population))]

        next_population = sorted_population[:int(elitism_rate * pop_size)]

        while len(next_population) < pop_size:
            parent1, parent2 = random.sample(sorted_population[:pop_size//2], 2)
            child = crossover(parent1, parent2, num_vehicles)
            mutate(child, mutation_rate)
            adjust(child, distance_matrix, vehicle_capacity)
            next_population.append(child)

        population = next_population

        current_best_fitness = min(fitness_values)
        current_best_penalty = penalties_used[fitness_values.index(current_best_fitness)]
        if current_best_fitness < best_fitness:
            best_fitness = current_best_fitness
            best_individual = population[fitness_values.index(current_best_fitness)]

        print(f"Generation {generation} | Best Fitness: {current_best_fitness} | Penalty Used: {current_best_penalty}")

    return best_individual, best_fitness, current_best_penalty, fitness_scores[fitness_values.index(current_best_fitness)][2]

def run_interval(trip_requests, travel_time_matrices, distance_matrix, interval_index):
    travel_time_matrix = travel_time_matrices[interval_index]
    best_route, best_cost, penalty_used, total_distance = genetic_algorithm(
        travel_time_matrix, distance_matrix, trip_requests, population_size, num_generations,
        mutation_rate, crossover_rate, elitism_rate, num_vehicles, vehicle_capacity
    )
    return best_route, best_cost, penalty_used, total_distance

# Parameters for the genetic algorithm
population_size = 200
num_generations = 100
mutation_rate = 0.08
crossover_rate = 0.7
elitism_rate = 0.1
num_vehicles = 10
vehicle_capacity = 3

# Define the interval boundaries
interval_start = pd.to_datetime("2024-06-28 08:15:00")
interval_end = pd.to_datetime("2024-06-28 08:30:00")

# Filter the trip requests within the interval
trip_requests_interval = trip_data[(trip_data['trip_start_timestamp'] >= interval_start) & (trip_data['trip_start_timestamp'] <= interval_end)]

# Step 4: Run the Interval Optimization
best_route2, best_cost2, penalty_used2, total_distance2 = run_interval(
    trip_requests_interval, travel_time_matrices, physical_distance_matrix, 0
)

print("Interval Results:")
print("Best Route:", best_route2)
print("Best Cost:", best_cost2)
print("Penalty Used:", penalty_used2)
print("Total Distance:", total_distance2)


Generation 0 | Best Fitness: 296.0102141555334 | Penalty Used: True
Generation 1 | Best Fitness: 296.0102141555334 | Penalty Used: True
Generation 2 | Best Fitness: 296.0102141555334 | Penalty Used: True
Generation 3 | Best Fitness: 296.0102141555334 | Penalty Used: True
Generation 4 | Best Fitness: 296.0102141555334 | Penalty Used: True
Generation 5 | Best Fitness: 296.0102141555334 | Penalty Used: True
Generation 6 | Best Fitness: 296.0102141555334 | Penalty Used: True
Generation 7 | Best Fitness: 296.0102141555334 | Penalty Used: True
Generation 8 | Best Fitness: 296.0102141555334 | Penalty Used: True
Generation 9 | Best Fitness: 296.0102141555334 | Penalty Used: True
Generation 10 | Best Fitness: 296.0102141555334 | Penalty Used: True
Generation 11 | Best Fitness: 296.0102141555334 | Penalty Used: True
Generation 12 | Best Fitness: 296.0102141555334 | Penalty Used: True
Generation 13 | Best Fitness: 296.0102141555334 | Penalty Used: True
Generation 14 | Best Fitness: 296.0102141555

In [None]:
def calculate_total_distance_and_time(route, travel_time_matrix, distance_matrix):
    total_distances = []
    total_times = []
    vehicle_routes = []

    for vehicle_route in route:
        if len(vehicle_route) == 0:
            total_distances.append(0)
            total_times.append(timedelta(0))
            vehicle_routes.append([])
            continue

        total_distance = 0
        total_time = timedelta(0)
        current_time = datetime.strptime("2024-06-28 08:15:00", '%Y-%m-%d %H:%M:%S')
        vehicle_route_info = []

        for i in range(len(vehicle_route)):
            pickup, dropoff, trip_start = vehicle_route[i]
            if i == 0:
                # For the first trip, only add the time and distance for the trip itself
                total_distance += distance_matrix[pickup][dropoff]
                travel_time = float(travel_time_matrix[pickup][dropoff])
                total_time += timedelta(seconds=travel_time)
                current_time += timedelta(seconds=travel_time)
            else:
                previous_dropoff = vehicle_route[i - 1][1]
                # Distance and time from the previous dropoff to the current pickup
                total_distance += distance_matrix[previous_dropoff][pickup]
                travel_time = float(travel_time_matrix[previous_dropoff][pickup])
                total_time += timedelta(seconds=travel_time)
                current_time += timedelta(seconds=travel_time)

                # Distance and time from the current pickup to the dropoff
                total_distance += distance_matrix[pickup][dropoff]
                travel_time = float(travel_time_matrix[pickup][dropoff])
                total_time += timedelta(seconds=travel_time)
                current_time += timedelta(seconds=travel_time)

            vehicle_route_info.append(f'{pickup}/{dropoff}, {trip_start.strftime("%H:%M:%S")}, {current_time.strftime("%H:%M:%S")}')

        total_distances.append(total_distance)
        total_times.append(total_time)
        vehicle_routes.append(vehicle_route_info)

    return total_distances, total_times, vehicle_routes




In [None]:
# Calculate total distance and travel time for each vehicle
total_distances, total_times, vehicle_routes = calculate_total_distance_and_time(
    best_route2, travel_time_matrices[0], physical_distance_matrix
)

# Print the results
for i in range(len(best_route2)):
    print(f"Vehicle {i + 1}:")
    print(f"  Total Distance: {total_distances[i]:.2f} miles")
    print(f"  Total Travel Time: {total_times[i]}")
    print("  Optimal Route:")
    for trip in vehicle_routes[i]:
        print(f"    {trip}")

Vehicle 1:
  Total Distance: 7.17 miles
  Total Travel Time: 0:22:29.574920
  Optimal Route:
    0/4, 08:27:00, 08:19:22
    2/15, 08:28:00, 08:37:29
Vehicle 2:
  Total Distance: 10.98 miles
  Total Travel Time: 0:35:10.287330
  Optimal Route:
    25/2, 08:23:00, 08:18:36
    26/4, 08:27:00, 08:50:10
Vehicle 3:
  Total Distance: 10.32 miles
  Total Travel Time: 0:31:19.734190
  Optimal Route:
    19/15, 08:20:00, 08:27:34
    24/17, 08:29:00, 08:46:19
Vehicle 4:
  Total Distance: 0.00 miles
  Total Travel Time: 0:00:00
  Optimal Route:
Vehicle 5:
  Total Distance: 14.44 miles
  Total Travel Time: 0:42:40.346500
  Optimal Route:
    25/2, 08:23:00, 08:18:36
    19/29, 08:25:00, 08:57:40
Vehicle 6:
  Total Distance: 25.65 miles
  Total Travel Time: 1:17:18.906490
  Optimal Route:
    18/0, 08:18:00, 08:20:01
    22/21, 08:25:00, 08:53:07
    19/29, 08:25:00, 09:32:18
Vehicle 7:
  Total Distance: 14.39 miles
  Total Travel Time: 0:47:02.857240
  Optimal Route:
    4/26, 08:24:00, 08:31:32