In [4]:
import pandas as pd
import numpy as np
import random
import googlemaps

In [5]:
gmaps1 = googlemaps.Client(key='AIzaSyDy3LNlFeCmPoJfGjxG4GhXewyhHHw3Ux8')
gmaps2 = googlemaps.Client(key='AIzaSyC4dICfe5c823PPYcjeCefHV7C6uxsntpQ')

In [6]:
# 0: normal, 1: mild, 2: dangerous
weather_idx_to_label = {0: 'normal', 1: 'mild', 2: 'dangerous'}
weather = [0, 1, 2]
weather_probs = [14, 5, 1]
weather_delay = [0.0, 0.3, 1.0]

# 0: 'truck', 1: 'train', 2: 'flight'
transport_mode_idx_to_label = {0: 'truck', 1: 'train', 2: 'flight'}
transport_mode = [0, 1, 2]
transport_mode_probs = [10, 7, 3]
transport_mode_delay = [0.0, 0.3, 0.7]

delay_time = [2, 6, 10, 14, 20, 24, 30, 36, 42, 48]
delay_time_probs = [2, 4, 6, 4, 2, 3, 2, 2, 1, 1]

shipping_centres = ['Mumbai', 'Banglore', 'Hyderabad', 'Chennai', 'Ahmedabad', 'Jaipur', 'Gurgaon', 'Pune', 'Delhi', 'Kolkata', 'Allahabad']
delivery_centres = ['Nagpur', 'Jabalpur', 'Pune', 'Lucknow', 'Kanpur', 'Nagpur', 'Indore', 
                    'Thane', 'Bhopal', 'Visakhapatnam', 'Patna', 'Vadodara', 'Ghaziabad', 
                    'Ludhiana', 'Agra', 'Nashik', 'Faridabad', 'Meerut', 'Rajkot', 'Varanasi', 
                    'Srinagar', 'Aurangabad', 'Dhanbad', 'Amritsar', 'Allahabad', 'Ranchi', 
                    'Haora', 'Coimbatore', 'Jabalpur', 'Gwalior', 'Vijayawada', 'Jodhpur', 
                    'Madurai', 'Raipur', 'Kota', 'Guwahati', 'Chandigarh', 'Solapur', 'Bareilly', 
                    'Mysore', 'Gurgaon', 'Amritsar', 'Jalandhar']

multiset = lambda values, probs: [values[j] for j in range(len(probs)) for i in range(probs[j])]

weather_ditro = multiset(weather, weather_probs)
transport_mode_distro = multiset(transport_mode, transport_mode_probs)
delay_time_distro = multiset(delay_time, delay_time_probs)
random.shuffle(weather_ditro)
random.shuffle(transport_mode_distro)
random.shuffle(delay_time_distro)

In [7]:
from tqdm import tqdm

city_matrix = {}

for sh_city in shipping_centres:
    for de_city in delivery_centres:
        city_matrix[sh_city] = {}

i = 0
for sh_city in shipping_centres:
    print(sh_city)
    for de_city in delivery_centres:
        try:
            if i % 2 == 0:
                response = gmaps1.distance_matrix(sh_city, de_city)
            else: 
                response = gmaps2.distance_matrix(sh_city, de_city)
            response = response['rows'][0]['elements'][0]
            dist_kms = response['distance']['value'] / 1000
            time_hrs = response['duration']['value'] / 3600
            city_matrix[sh_city][de_city] = [dist_kms, time_hrs]
            i += 1
        except Exception as e:
            print(e)
            print([sh_city, de_city])

Mumbai
Banglore
Hyderabad
Chennai
Ahmedabad
Jaipur
Gurgaon
Pune
Delhi
Kolkata
Allahabad


In [8]:
def make_dataset(num_examples):
    
    X = []
    y = []
    
    for i in tqdm(range(num_examples), total=num_examples, desc='Examples'):
        try:
            weather_val = random.choice(weather_ditro)
            transport_mode_val = random.choice(transport_mode_distro)
            delay_time_val = random.choice(delay_time_distro)

            shipping_centre = random.choice(shipping_centres)
            delivery_centre = random.choice(delivery_centres)

            dist_kms, time_hrs = city_matrix[shipping_centre][delivery_centre]

            time_hrs += time_hrs * weather_delay[weather_val]
            time_hrs -= time_hrs * transport_mode_delay[transport_mode_val]
            time_hrs += delay_time_val

            X.append([weather_val, transport_mode_val, delay_time_val, dist_kms])
            y.append(time_hrs)
        except Exception as e:
            print(str(e))
            print(response)
            print(shipping_centre)
            print(delivery_centre)
            
    return X, y

X, y = make_dataset(4000)

Examples: 100%|████████████████████████████████████████████████████████████| 4000/4000 [00:00<00:00, 160426.24it/s]


In [9]:
print(X[0])
print(y[0])

[1, 1, 10, 1603.83]
38.657922222222226


In [13]:
def convert_to_readable(X, y):
    print('Weather: {}'.format(weather_idx_to_label[X[0]]))
    print('Transport Mode: {}'.format(transport_mode_idx_to_label[X[1]]))
    print('Delay Time: {} Hrs'.format(X[2]))
    print('Distance (Google API): {:.2f} Kms'.format(X[3]))
    print('Estimated Delivery Time: {:.2f} Hrs'.format(y))
    
index = 203
convert_to_readable(X[index], y[index])

Weather: normal
Transport Mode: truck
Delay Time: 14 Hrs
Distance (Google API): 473.19 Kms
Estimated Delivery Time: 23.08 Hrs


In [14]:
import pickle

with open('city_matrix.pkl', 'wb') as file:
    pickle.dump(city_matrix, file)

with open('dell_dataset.pkl', 'wb') as file:
    pickle.dump((X, y), file)

In [30]:
X_train, X_test, y_train, y_test = X[: -1500], X[-1500: ], y[: -1500], y[-1500: ]

In [33]:
from sklearn.linear_model import LinearRegression, Ridge, BayesianRidge
from sklearn.metrics import mean_squared_error, r2_score

regressor = LinearRegression()
regressor.fit(X_train, y_train)
r2_score = regressor.score(X_test, y_test)
prediction = regressor.predict(X_test)
rmse = mean_squared_error(y_test, prediction)
print(r2_score, rmse)

0.9366125692195637 19.293873966855838


In [35]:
print(y_test[:5])
print(prediction[:5])

print(np.mean(np.array(np.array(y_test) - np.array(prediction))))

[11.353333333333333, 32.60358333333333, 17.464055555555557, 54.36833333333333, 25.43472222222222]
[13.66436382 31.36922635 15.67647321 50.01440055 26.28097733]
-0.07794275660695804


In [26]:
print('Actual Data: {}'.format(city_matrix['Mumbai']['Lucknow']))
regressor.predict([[2, 1, 20, 1446.5]])

Actual Data: [1446.493, 24.7875]


array([50.64317884])

In [27]:
with open('linear_regressor.pkl', 'wb') as file:
    pickle.dump(regressor, file)