# Import required libraries

In [34]:
import math
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor

from sklearn.metrics import mean_absolute_error, mean_squared_error, median_absolute_error, max_error, r2_score
import time

The functions that perform different operations

In [35]:
#Risk Assessment
def risk_assessment(value_a, value_b, value_c):
    #Impact Area Ranking
    ranking = {'Safety': 3, 'Operation': 2, 'Physical': 1}
    
    #Risk Score computation
    risk_score = 0
    risk_score += value_a * ranking['Safety']
    risk_score += value_b * ranking['Operation']
    risk_score += value_c * ranking['Physical']
    return risk_score

#Privacy Level Conversion
def epsilon_derivation(Upsilon_1, epsilon_0):
    return (1/(1 - Upsilon_1 * (1 - 3*(math.exp(-epsilon_0)))))

#print(round(epsilon_derivation(privacy_level, 1),1))

#Add Laplace Noise of Differential Privacy
def add_laplace_noise(data, epsilon, sensitivity):
    """
    Add Laplace noise to the data
    :param data: Data to be perturbed
    :param epsilon: Privacy parameter
    :param sensitivity: Sensitivity of the data
    :return: Perturbed data
    """
    laplace_noise = np.random.laplace(loc=0, scale=sensitivity/epsilon, size=data.shape)
    return data + laplace_noise

sensitivity = 0.1  # Sensitivity of the data


#Mobility Model Determination
def mobility_model(df):
    #Convert dataset into array
    latitude = df['latitude'].to_numpy()
    longitude = df['longitude'].to_numpy()

    result = []
    #Calculate Distance between Coordinate points
    for i in range(len(latitude) - 1):
        lat1, lon1 = np.radians(latitude[i]), np.radians(longitude[i])
        lat2, lon2 = np.radians(latitude[i + 1]), np.radians(longitude[i + 1])
        a = np.sin(lat1) * np.sin(lat2) + np.cos(lat1) * np.cos(lat2) * np.cos(lon2 - lon1)
        result.append(np.arccos(a) * 6371)

    #Mean of the calculate distance between coodinates    
    average = np.mean(result)
    #print("Average distance:", average, "km")

    #Mobility Determination
    mobility = ''
    epsilon = 0
    impact_area_a = 0
    impact_area_b = 0
    impact_area_c = 0
    if average > 1:
        mobility = 'Random Mobility'
        impact_area_a = 3
        impact_area_b = 3
        impact_area_c = 3
    elif average > 0 and average <= 0.2:
        mobility = 'Flow Mobility'
        impact_area_a = 1
        impact_area_b = 1
        impact_area_c = 1
    else:
        print("Mobility: Not determined")
        
    #Determine the privacy level using the risk score 
    privacy_level = risk_assessment(impact_area_a, impact_area_b, impact_area_c)
    
    #Determine the Privacy parameter level
    epsilon = round(epsilon_derivation(privacy_level, 1),1)
    
    #Return Privacy Parameter Level
    return epsilon

Training the model using Decision Tree Regressor

In [36]:
#Training the model using Decision Tree Regressor
import time

start_time = time.time()

print ('Training the model using Decision Tree Regressor')
# Load the dataset
df = pd.read_csv("derby_location_data.csv")

df["Latitude"] = df["latitude"]
df["Longitude"] = df["longitude"]

# Split the data into train and test sets
train_data = df.sample(frac=0.8, random_state=0)
test_data = df.drop(train_data.index)

# Train a decision tree regressor
X_train = train_data.drop(columns=["Latitude", "Longitude"])
y_train = train_data[["Latitude", "Longitude"]]

model = DecisionTreeRegressor().fit(X_train, y_train)

# Predict the Latitude and Longitude using the test data
X_test = test_data.drop(columns=["Latitude", "Longitude"])
y_test = test_data[["Latitude", "Longitude"]]
predictions = model.predict(X_test)

# Calculate evaluation metrics for the test data
mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
median_ae = median_absolute_error(y_test, predictions)
#max_err = max_error(y_test, predictions)
accuracy = r2_score(y_test, predictions)

print("Mean Absolute Error (Test data):", mae)
print("Median Absolute Error (Test data):", median_ae)
#print("Max Error (Test data):", max_err)
print("Mean Squared Error (Test data):", mse)
print("Accuracy (Test data):", accuracy)


print("Time taken for process: %s seconds" % (time.time() - start_time))

Training the model using Decision Tree Regressor
Mean Absolute Error (Test data): 0.00012262956271129918
Median Absolute Error (Test data): 8.155750000016226e-05
Mean Squared Error (Test data): 3.081034642556213e-08
Accuracy (Test data): 0.9999933394296455
Time taken for process: 6.585157155990601 seconds


Testing Decision Tree Regressor model on new dataset

In [37]:
start_time = time.time()

#Testing Decision Tree Regressor model on new dataset
print ('Testing Decision Tree Regressor model on new dataset')
# Load the new dataset
new_data = pd.read_csv("BM.csv")

epsilon = mobility_model(new_data)
#latitude = new_data['latitude'].to_numpy()
#longitude = new_data['longitude'].to_numpy()

#print (mobility_model(new_data))

# Add Laplace noise to the data
#epsilon = 0.5  # Privacy parameter
print ('Epsilon level is: ', epsilon)
sensitivity = 0.1  # Sensitivity of the data
new_data["latitude"] = add_laplace_noise(new_data["latitude"], epsilon, sensitivity)
new_data["longitude"] = add_laplace_noise(new_data["longitude"], epsilon, sensitivity)

new_pred_data = new_data[["latitude", "longitude"]]

#print(new_pred_data)

# Predict the Latitude and Longitude using the new data
new_predictions = model.predict(new_pred_data)

#print(new_predictions)
# Calculate evaluation metrics for the new data
new_mae = mean_absolute_error(new_pred_data[["latitude", "longitude"]], new_predictions)
new_mse = mean_squared_error(new_pred_data[["latitude", "longitude"]], new_predictions)
new_median_ae = median_absolute_error(new_pred_data[["latitude", "longitude"]], new_predictions)
#new_max_err = max_error(new_data[["latitude", "longitude"]], new_predictions)
new_accuracy = r2_score(new_pred_data[["latitude", "longitude"]], new_predictions)


print("Mean Absolute Error (Test data):", new_mae)
print("Median Absolute Error (Test data):", new_median_ae)
#print("Max Error (Test data):", max_err)
print("Mean Squared Error (Test data):", new_mse)
print("Accuracy (Test data):", new_accuracy)


print("Time taken for process: %s seconds" % (time.time() - start_time))

Testing Decision Tree Regressor model on new dataset
Epsilon level is:  0.6
Mean Absolute Error (Test data): 0.12043501605654994
Median Absolute Error (Test data): 0.032608351340146946
Mean Squared Error (Test data): 0.05393141997093676
Accuracy (Test data): 0.40617841953603334
Time taken for process: 0.03618812561035156 seconds


Training the model using Linear Regression

In [38]:
start_time = time.time()

#Training the model using Linear Regression
print ('Training the model using Linear Regression')

# Load the dataset
df = pd.read_csv("derby_location_data.csv")

df["Latitude"] = df["latitude"]
df["Longitude"] = df["longitude"]

# Split the data into train and test sets
train_data = df.sample(frac=0.8, random_state=0)
test_data = df.drop(train_data.index)

# Train a linear regression model
X_train = train_data.drop(columns=["Latitude", "Longitude"])
y_train = train_data[["Latitude", "Longitude"]]
model = LinearRegression().fit(X_train, y_train)

# Predict the Latitude and Longitude using the test data
X_test = test_data.drop(columns=["Latitude", "Longitude"])
y_test = test_data[["Latitude", "Longitude"]]
predictions = model.predict(X_test)

# Calculate evaluation metrics for the test data
mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
median_ae = median_absolute_error(y_test, predictions)
#max_err = max_error(y_test, predictions)
accuracy = r2_score(y_test, predictions)

print("Mean Absolute Error (Test data):", mae)
print("Median Absolute Error (Test data):", median_ae)
#print("Max Error (Test data):", max_err)
print("Mean Squared Error (Test data):", mse)
print("Accuracy (Test data):", accuracy)


print("Time taken for process: %s seconds" % (time.time() - start_time))

Training the model using Linear Regression
Mean Absolute Error (Test data): 1.1118906223304085e-16
Median Absolute Error (Test data): 1.1102230246251565e-16
Mean Squared Error (Test data): 3.702222988629772e-32
Accuracy (Test data): 1.0
Time taken for process: 0.891796350479126 seconds


Testing Linear Regression model on new dataset

In [39]:
start_time = time.time()

#Testing Linear Regression model on new dataset
print ('Testing Linear Regression model on new dataset')

# Load the new dataset
new_data = pd.read_csv("BM.csv")

#
epsilon = mobility_model(new_data)
#latitude = new_data['latitude'].to_numpy()
#longitude = new_data['longitude'].to_numpy()

#print (calculate_average_distance(new_data))

# Add Laplace noise to the data
#epsilon = 0.5  # Privacy parameter
print ('Epsilon level is: ', epsilon)
sensitivity = 0.1  # Sensitivity of the data
new_data["latitude"] = add_laplace_noise(new_data["latitude"], epsilon, sensitivity)
new_data["longitude"] = add_laplace_noise(new_data["longitude"], epsilon, sensitivity)

new_pred_data = new_data[["latitude", "longitude"]]

# Predict the Latitude and Longitude using the new data
new_predictions = model.predict(new_pred_data)


# Calculate evaluation metrics for the new data
new_mae = mean_absolute_error(new_pred_data[["latitude", "longitude"]], new_predictions)
new_mse = mean_squared_error(new_pred_data[["latitude", "longitude"]], new_predictions)
new_median_ae = median_absolute_error(new_pred_data[["latitude", "longitude"]], new_predictions)
#new_max_err = max_error(new_data[["latitude", "longitude"]], new_predictions)
new_accuracy = r2_score(new_pred_data[["latitude", "longitude"]], new_predictions)


print("Mean Absolute Error (Test data):", new_mae)
print("Median Absolute Error (Test data):", new_median_ae)
#print("Max Error (Test data):", max_err)
print("Mean Squared Error (Test data):", new_mse)
print("Accuracy (Test data):", new_accuracy)


print("Time taken for process: %s seconds" % (time.time() - start_time))

Testing Linear Regression model on new dataset
Epsilon level is:  0.6
Mean Absolute Error (Test data): 6.913661562438475e-16
Median Absolute Error (Test data): 4.996003610813204e-16
Mean Squared Error (Test data): 1.666244554067677e-30
Accuracy (Test data): 1.0
Time taken for process: 0.02692699432373047 seconds


In [40]:
start_time = time.time()

#Training the model using K-Nearest Neighbors Regressor
print ('Training the model using K-Nearest Neighbors Regressor')

#Load the dataset
df = pd.read_csv("derby_location_data.csv")

df["Latitude"] = df["latitude"]
df["Longitude"] = df["longitude"]

#Split the data into train and test sets
train_data = df.sample(frac=0.8, random_state=0)
test_data = df.drop(train_data.index)

#Train a K-Nearest Neighbors regressor
X_train = train_data.drop(columns=["Latitude", "Longitude"])
y_train = train_data[["Latitude", "Longitude"]]

model = KNeighborsRegressor().fit(X_train, y_train)

#Predict the Latitude and Longitude using the test data
X_test = test_data.drop(columns=["Latitude", "Longitude"])
y_test = test_data[["Latitude", "Longitude"]]
predictions = model.predict(X_test)

#Calculate evaluation metrics for the test data
mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
median_ae = median_absolute_error(y_test, predictions)
#max_err = max_error(y_test, predictions)
accuracy = r2_score(y_test, predictions)

print("Mean Absolute Error (Test data):", mae)
print("Median Absolute Error (Test data):", median_ae)
#print("Max Error (Test data):", max_err)
print("Mean Squared Error (Test data):", mse)
print("Accuracy (Test data):", accuracy)

print("Time taken for process: %s seconds" % (time.time() - start_time))

Training the model using K-Nearest Neighbors Regressor
Mean Absolute Error (Test data): 6.528697854515475e-05
Median Absolute Error (Test data): 5.401659999881847e-05
Mean Squared Error (Test data): 6.865980238423773e-09
Accuracy (Test data): 0.9999985135161187
Time taken for process: 4.717034816741943 seconds


In [41]:
start_time = time.time()

#Testing K-Nearest Neighbors Regressor model on new dataset
print ('Testing K-Nearest Neighbors Regressor model on new dataset')

# Load the new dataset
new_data = pd.read_csv("BM.csv")

#
epsilon = mobility_model(new_data)
#latitude = new_data['latitude'].to_numpy()
#longitude = new_data['longitude'].to_numpy()

#print (calculate_average_distance(new_data))

# Add Laplace noise to the data
#epsilon = 0.5  # Privacy parameter
print ('Epsilon level is: ', epsilon)
sensitivity = 0.1  # Sensitivity of the data
new_data["latitude"] = add_laplace_noise(new_data["latitude"], epsilon, sensitivity)
new_data["longitude"] = add_laplace_noise(new_data["longitude"], epsilon, sensitivity)

new_pred_data = new_data[["latitude", "longitude"]]

# Predict the Latitude and Longitude using the new data
new_predictions = model.predict(new_pred_data)


# Calculate evaluation metrics for the new data
new_mae = mean_absolute_error(new_pred_data[["latitude", "longitude"]], new_predictions)
new_mse = mean_squared_error(new_pred_data[["latitude", "longitude"]], new_predictions)
new_median_ae = median_absolute_error(new_pred_data[["latitude", "longitude"]], new_predictions)
#new_max_err = max_error(new_data[["latitude", "longitude"]], new_predictions)
new_accuracy = r2_score(new_pred_data[["latitude", "longitude"]], new_predictions)


print("Mean Absolute Error (Test data):", new_mae)
print("Median Absolute Error (Test data):", new_median_ae)
#print("Max Error (Test data):", max_err)
print("Mean Squared Error (Test data):", new_mse)
print("Accuracy (Test data):", new_accuracy)


print("Time taken for process: %s seconds" % (time.time() - start_time))

Testing K-Nearest Neighbors Regressor model on new dataset
Epsilon level is:  0.6
Mean Absolute Error (Test data): 0.08523564936365396
Median Absolute Error (Test data): 0.0018887014757379417
Mean Squared Error (Test data): 0.028750984130780054
Accuracy (Test data): 0.4300739898081863
Time taken for process: 0.0358431339263916 seconds


In [42]:

#Training the model using Multi-layer Perceptron regressor

start_time = time.time()

print ('Training the model using Multi-layer Perceptron regressor Neural Network')

#Load the dataset
df = pd.read_csv("derby_location_data.csv")

df["Latitude"] = df["latitude"]
df["Longitude"] = df["longitude"]

#Split the data into train and test sets
train_data = df.sample(frac=0.8, random_state=0)
test_data = df.drop(train_data.index)

#Train a neural network
X_train = train_data.drop(columns=["Latitude", "Longitude"])
y_train = train_data[["Latitude", "Longitude"]]

model = MLPRegressor().fit(X_train, y_train)

#Predict the Latitude and Longitude using the test data
X_test = test_data.drop(columns=["Latitude", "Longitude"])
y_test = test_data[["Latitude", "Longitude"]]
predictions = model.predict(X_test)

#Calculate evaluation metrics for the test data
mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
median_ae = median_absolute_error(y_test, predictions)
#max_err = max_error(y_test, predictions)
accuracy = r2_score(y_test, predictions)

print("Mean Absolute Error (Test data):", mae)
print("Median Absolute Error (Test data):", median_ae)
#print("Max Error (Test data):", max_err)
print("Mean Squared Error (Test data):", mse)
print("Accuracy (Test data):", accuracy)
print("Time taken for process: %s seconds" % (time.time() - start_time))

Training the model using Multi-layer Perceptron regressor Neural Network
Mean Absolute Error (Test data): 0.015355547583054432
Median Absolute Error (Test data): 0.015351751682410542
Mean Squared Error (Test data): 0.0002950072491282125
Accuracy (Test data): 0.9557096991811475
Time taken for process: 78.17834424972534 seconds


In [43]:
start_time = time.time()

#Testing Multi-layer Perceptron regressor model on new dataset
print ('Testing Multi-layer Perceptron regressor model on new dataset')

# Load the new dataset
new_data = pd.read_csv("BM.csv")

#
epsilon = mobility_model(new_data)
#latitude = new_data['latitude'].to_numpy()
#longitude = new_data['longitude'].to_numpy()

#print (calculate_average_distance(new_data))

# Add Laplace noise to the data
#epsilon = 0.5  # Privacy parameter
print ('Epsilon level is: ', epsilon)
sensitivity = 0.1  # Sensitivity of the data
new_data["latitude"] = add_laplace_noise(new_data["latitude"], epsilon, sensitivity)
new_data["longitude"] = add_laplace_noise(new_data["longitude"], epsilon, sensitivity)

new_pred_data = new_data[["latitude", "longitude"]]

# Predict the Latitude and Longitude using the new data
new_predictions = model.predict(new_pred_data)


# Calculate evaluation metrics for the new data
new_mae = mean_absolute_error(new_pred_data[["latitude", "longitude"]], new_predictions)
new_mse = mean_squared_error(new_pred_data[["latitude", "longitude"]], new_predictions)
new_median_ae = median_absolute_error(new_pred_data[["latitude", "longitude"]], new_predictions)
#new_max_err = max_error(new_data[["latitude", "longitude"]], new_predictions)
new_accuracy = r2_score(new_pred_data[["latitude", "longitude"]], new_predictions)


print("Mean Absolute Error (Test data):", new_mae)
print("Median Absolute Error (Test data):", new_median_ae)
#print("Max Error (Test data):", max_err)
print("Mean Squared Error (Test data):", new_mse)
print("Accuracy (Test data):", new_accuracy)


print("Time taken for process: %s seconds" % (time.time() - start_time))

Testing Multi-layer Perceptron regressor model on new dataset
Epsilon level is:  0.6
Mean Absolute Error (Test data): 0.015362806081342027
Median Absolute Error (Test data): 0.014829729078780718
Mean Squared Error (Test data): 0.00029311384739786326
Accuracy (Test data): 0.9943997502057357
Time taken for process: 0.01695561408996582 seconds


In [44]:
start_time = time.time()
print ('Training the model using Multi-layer Perceptron regressor')

#Load the dataset
df = pd.read_csv("derby_location_data.csv")

df["Latitude"] = df["latitude"]
df["Longitude"] = df["longitude"]

#Split the data into train and test sets
train_data = df.sample(frac=0.8, random_state=0)
test_data = df.drop(train_data.index)

#Train a MLP regressor
X_train = train_data.drop(columns=["Latitude", "Longitude"])
y_train = train_data[["Latitude", "Longitude"]]

model = MLPRegressor(hidden_layer_sizes=(10,10,10), max_iter=50, random_state=0).fit(X_train, y_train)

#Predict the Latitude and Longitude using the test data
X_test = test_data.drop(columns=["Latitude", "Longitude"])
y_test = test_data[["Latitude", "Longitude"]]
predictions = model.predict(X_test)

#Calculate evaluation metrics for the test data
mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
median_ae = median_absolute_error(y_test, predictions)
#max_err = max_error(y_test, predictions)
accuracy = r2_score(y_test, predictions)

print("Mean Absolute Error (Test data):", mae)
print("Median Absolute Error (Test data):", median_ae)
#print("Max Error (Test data):", max_err)
print("Mean Squared Error (Test data):", mse)
print("Accuracy (Test data):", accuracy)
print("Time taken for process: %s seconds" % (time.time() - start_time))

Training the model using Multi-layer Perceptron regressor
Mean Absolute Error (Test data): 0.004385575660970478
Median Absolute Error (Test data): 0.004343769829961985
Mean Squared Error (Test data): 3.5342553213956823e-05
Accuracy (Test data): 0.9951747042439838
Time taken for process: 85.83809638023376 seconds


In [45]:
start_time = time.time()

#Testing Second Multi-layer Perceptron regressor model on new dataset
print ('Testing Second Multi-layer Perceptron regressor model on new dataset')

# Load the new dataset
new_data = pd.read_csv("BM.csv")

#
epsilon = mobility_model(new_data)
#latitude = new_data['latitude'].to_numpy()
#longitude = new_data['longitude'].to_numpy()

#print (calculate_average_distance(new_data))

# Add Laplace noise to the data
#epsilon = 0.5  # Privacy parameter
print ('Epsilon level is: ', epsilon)
sensitivity = 0.1  # Sensitivity of the data
new_data["latitude"] = add_laplace_noise(new_data["latitude"], epsilon, sensitivity)
new_data["longitude"] = add_laplace_noise(new_data["longitude"], epsilon, sensitivity)

new_pred_data = new_data[["latitude", "longitude"]]

# Predict the Latitude and Longitude using the new data
new_predictions = model.predict(new_pred_data)


# Calculate evaluation metrics for the new data
new_mae = mean_absolute_error(new_pred_data[["latitude", "longitude"]], new_predictions)
new_mse = mean_squared_error(new_pred_data[["latitude", "longitude"]], new_predictions)
new_median_ae = median_absolute_error(new_pred_data[["latitude", "longitude"]], new_predictions)
#new_max_err = max_error(new_data[["latitude", "longitude"]], new_predictions)
new_accuracy = r2_score(new_pred_data[["latitude", "longitude"]], new_predictions)


print("Mean Absolute Error (Test data):", new_mae)
print("Median Absolute Error (Test data):", new_median_ae)
#print("Max Error (Test data):", max_err)
print("Mean Squared Error (Test data):", new_mse)
print("Accuracy (Test data):", new_accuracy)


print("Time taken for process: %s seconds" % (time.time() - start_time))

Testing Second Multi-layer Perceptron regressor model on new dataset
Epsilon level is:  0.6
Mean Absolute Error (Test data): 0.005335664448461079
Median Absolute Error (Test data): 0.004883544046386246
Mean Squared Error (Test data): 4.392187708443809e-05
Accuracy (Test data): 0.9989737689313165
Time taken for process: 0.028921127319335938 seconds
