<a href="https://colab.research.google.com/github/Rohit-Annamaneni-1207/DN_project_route_trace/blob/main/models_training_testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from geopy.geocoders import Nominatim

**PREPROCESSING FOR NAIVE BAYES, DECISION TREE AND RANDOM FOREST**

In [None]:
def latitude_longitude(place):
  geolocator = Nominatim()
  location = geolocator.geocode(place)
  lat=location.latitude
  lon=location.longitude
  return [lat, lon]

def predict_inverse(model, encoders, src_city, dest, hop_no):
  le_src_city = encoders[0]
  le_dest = encoders[1]
  le_hop_city = encoders[2]

  X = np.array([src_city, dest, hop_no])
  X[0] = le_src_city.transform([X[0]])[0]
  X[1] = le_dest.transform([X[1]])[0]
  # print(X)

  y = model.predict([X])
  # print(y)
  y = le_hop_city.inverse_transform([y[0]])[0]
  return [y] + latitude_longitude(y)





In [None]:
url = "https://raw.githubusercontent.com/Rohit-Annamaneni-1207/DN_project_route_trace/main/route_data.csv"
url = "https://raw.githubusercontent.com/Rohit-Annamaneni-1207/DN_project_route_trace/main/route_data_updated.csv"
df = pd.read_csv(url, encoding = "utf-8")

In [None]:
df

Unnamed: 0.1,Unnamed: 0,session_no,src,src_city,src_region,src_country,dest,Hop_No,avg_rtt,IP_Address,City,Region_Name,Country
0,0,0,10.0.16.155,Nandigāma,Andhra Pradesh,IN,google,1,4,203.129.246.53,Nandigāma,Andhra Pradesh,IN
1,1,0,10.0.16.155,Nandigāma,Andhra Pradesh,IN,google,2,4,203.129.218.109,Hubli,Karnataka,IN
2,2,0,10.0.16.155,Nandigāma,Andhra Pradesh,IN,google,3,6,61.1.117.54,Bengaluru,Karnataka,IN
3,3,0,10.0.16.155,Nandigāma,Andhra Pradesh,IN,google,4,24,117.216.207.115,Tanjore,Tamil Nadu,IN
4,4,0,10.0.16.155,Nandigāma,Andhra Pradesh,IN,google,5,26,142.250.160.26,Chennai,Tamil Nadu,IN
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1652,1,74,10.0.3.194,Delhi,Delhi,IN,amazon,2,18,116.119.72.96,Gurgaon,Haryana,IN
1653,2,74,10.0.3.194,Delhi,Delhi,IN,amazon,3,33,182.79.142.123,Hyderābād,Telangana,IN
1654,3,74,10.0.3.194,Delhi,Delhi,IN,amazon,4,48,23.56.136.206,Mumbai,Maharashtra,IN
1655,4,74,10.0.3.194,Delhi,Delhi,IN,amazon,5,100,104.70.118.197,Mumbai,Maharashtra,IN


In [None]:
df[["Hop_No","dest"]]


Unnamed: 0,Hop_No,dest
0,1,google
1,2,google
2,3,google
3,4,google
4,5,google
...,...,...
1652,2,amazon
1653,3,amazon
1654,4,amazon
1655,5,amazon


In [None]:
df["Hop_No"]

0       1
1       2
2       3
3       4
4       5
       ..
1652    2
1653    3
1654    4
1655    5
1656    6
Name: Hop_No, Length: 1657, dtype: int64

In [None]:
df["dest"].values

array(['google', 'google', 'google', ..., 'amazon', 'amazon', 'amazon'],
      dtype=object)

In [None]:
from sklearn.preprocessing import LabelEncoder

le_dest = LabelEncoder()
X = df[["src_city", "dest", "Hop_No"]]
X = X.values
X[:,1] = le_dest.fit_transform(X[:,1])
X

array([['Nandigāma', 1, 1],
       ['Nandigāma', 1, 2],
       ['Nandigāma', 1, 3],
       ...,
       ['Delhi', 0, 4],
       ['Delhi', 0, 5],
       ['Delhi', 0, 6]], dtype=object)

In [None]:
le_src_city = LabelEncoder()
le_hop_city = LabelEncoder()
y = df["City"].values
y

array(['Nandigāma', 'Hubli', 'Bengaluru', ..., 'Mumbai', 'Mumbai',
       'Mumbai'], dtype=object)

In [None]:
y = le_hop_city.fit_transform(y)
X[:, 0] = le_src_city.fit_transform(X[:, 0])
y

array([12,  8,  4, ..., 11, 11, 11])

In [None]:
X

array([[1, 1, 1],
       [1, 1, 2],
       [1, 1, 3],
       ...,
       [0, 0, 4],
       [0, 0, 5],
       [0, 0, 6]], dtype=object)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
len(X_train)

1325

In [None]:
len(X_test)

332

**NAIVE BAYES MODEL**

In [None]:
from sklearn.naive_bayes import GaussianNB

classifier = GaussianNB()
classifier.fit(X_train, y_train)


GaussianNB()

In [None]:
classifier.score(X_test, y_test)

0.4578313253012048

In [None]:
import joblib
joblib.dump(classifier, "NB_model")



['NB_model']

In [None]:
np.save("src_city_classes.npy", le_src_city.classes_)
np.save("hop_city_classes.npy", le_hop_city.classes_)
np.save("dest_classes.npy", le_dest.classes_)



In [None]:
test_encoder = LabelEncoder()
test_encoder.classes_ = np.load("dest_classes.npy", allow_pickle=True)

In [None]:
t = test_encoder.inverse_transform([1])
t

array(['google'], dtype=object)

In [None]:
test_encoder.classes_ = np.load("hop_city_classes.npy", allow_pickle=True)
t = test_encoder.inverse_transform([7,5,3,13])

In [None]:
t

array(['Gurgaon', 'Chennai', 'Ashburn', 'Nelamangala'], dtype=object)

In [None]:
t = test_encoder.inverse_transform(y_test[:10])
t

array(['Chennai', 'Delhi', 'Singapore', 'San Francisco', 'Hubli',
       'Singapore', 'Delhi', 'Hyderābād', 'Hubli', 'Singapore'],
      dtype=object)

**DECISION TREE MODEL**

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)

DecisionTreeClassifier()

In [None]:
dtc.score(X_test, y_test)

0.9036144578313253

In [None]:
import joblib
joblib.dump(dtc, "DTC_model")

['DTC_model']

**RANDOM FOREST MODEL**

In [None]:
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)


RandomForestClassifier()

In [None]:
rfc.score(X_test, y_test)

0.9006024096385542

In [None]:
predict_inverse(rfc, [le_src_city, le_dest, le_hop_city], 'Nandigāma', 'amazon', 5)



['Ashburn', 39.030019100000004, -77.46964646557657]

In [None]:
import joblib
joblib.dump(rfc, "RFC_model")

['RFC_model']

#**Neural network**

In [None]:
from sklearn.neural_network import MLPClassifier
NN_model = MLPClassifier(max_iter = 2000)
NN_model.fit(X_train, y_train)
NN_model.score(X_test, y_test)

0.8945783132530121

In [None]:
predict_inverse(NN_model, [le_src_city, le_dest, le_hop_city], 'Nandigāma', 'amazon', 5)

  X = check_array(X, **check_params)


['Singapore', 1.357107, 103.8194992]

In [None]:
import joblib
joblib.dump(NN_model, "NN_model")

['NN_model']

In [None]:
X_test[0]

array([0, 1, 6], dtype=object)

In [None]:
y_test[0]

5

In [None]:
predict_inverse(NN_model, [le_src_city, le_dest, le_hop_city], 'Nandigāma', 'amazon', 6)

  X = check_array(X, **check_params)


['Pallāvaram', 12.989815700000001, 80.10098654184341]