In [1]:
import sklearn
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from PyRadioLoc.Utils.GeoUtils import GeoUtils

In [2]:
def get_distance_array(y_pred, y_test):
    dist = list()
    for y_sample, y_pred in  zip(y_test, y_pred):
        dist.append(GeoUtils.distanceInKm(y_sample[0], y_sample[1], y_pred[0], y_pred[1]))
    return dist

def get_quality_metrics(y_pred, y_test):
    dist = get_distance_array(y_pred, y_test)
    localization_error_min = min(dist)*1000
    localization_error_max = max(dist)*1000
    localization_error_mean = np.mean(dist)*1000
    localization_error_standard_deviation = np.std(dist)*1000
    print("min:            %0.2f m" % localization_error_min)
    print("max:            %0.2f m" % localization_error_max)
    print("mean:           %0.2f m" % localization_error_mean)
    print("std deviation:  %0.2f m" % localization_error_standard_deviation)
    
def write_to_csv(y_pred_lat, y_pred_lon, test_id, file_path):
    #y_pred_dataframe = pd.DataFrame(y_pred, columns=['lat', 'lon'])
    y_pred_dataframe = pd.concat([y_pred_lat, y_pred_lon, test_id], axis=1)
    y_pred_dataframe.to_csv(path_or_buf=file_path, index = False)

In [4]:
X_train = pd.read_csv("databasesTCC/Outdoor_X_train_80%.csv")
X_train = X_train.drop("lat", axis = 1)
X_train = X_train.drop("lon", axis = 1)
X_train = X_train.drop("idx", axis = 1)
X_train.head()

Unnamed: 0,dist_1,dist_2,dist_3,ang_1,cos_1,sin_1,tg_1,ang_2,cos_2,sin_2,...,ang_3,cos_3,sin_3,tg_3,delay_1,delay_2,delay_3,delay_12,delay_13,delay_23
0,1.623132,0.504054,1.266725,158.234761,-0.928711,0.370804,-0.399268,-155.889163,-0.912757,-0.408503,...,98.686639,-0.15103,0.988529,-6.545237,7,2,5,5,2,-3
1,0.946625,0.8921,0.682786,-175.257525,-0.996576,-0.082677,0.082961,-83.307306,0.116544,-0.993186,...,56.915837,0.54587,0.83787,1.534924,4,4,3,0,1,1
2,0.813049,1.083588,0.677145,-162.727543,-0.954904,-0.296916,0.310938,-75.518359,0.25007,-0.968228,...,37.151782,0.797038,0.603929,0.757716,3,5,3,-2,0,2
3,0.827905,0.49907,1.382476,138.771078,-0.752082,0.659069,-0.876326,-31.68349,0.850962,-0.525226,...,59.893937,0.501602,0.865098,1.72467,4,2,6,2,-2,-4
4,0.702384,0.73135,1.042135,165.550108,-0.968366,0.249533,-0.257685,-59.863447,0.502063,-0.864831,...,52.39437,0.610223,0.79223,1.298263,3,3,4,0,-1,-1


In [6]:
def y_RSSIs(file):
    y = pd.read_csv(file)
    y_rssi_1_1 = y.drop(["rssi_1_2", "rssi_1_3", "rssi_2_1", "rssi_2_2", "rssi_2_3", "rssi_3_1", "rssi_3_2", "rssi_3_3"], axis = 1)
    y_rssi_1_2 = y.drop(["rssi_1_1","rssi_1_3", "rssi_2_1", "rssi_2_2", "rssi_2_3", "rssi_3_1", "rssi_3_2", "rssi_3_3"], axis = 1)
    y_rssi_1_3 = y.drop(["rssi_1_1","rssi_1_2", "rssi_2_1", "rssi_2_2", "rssi_2_3", "rssi_3_1", "rssi_3_2", "rssi_3_3"], axis = 1)
    y_rssi_2_1 = y.drop(["rssi_1_1","rssi_1_2", "rssi_1_3", "rssi_2_2", "rssi_2_3", "rssi_3_1", "rssi_3_2", "rssi_3_3"], axis = 1)
    y_rssi_2_2 = y.drop(["rssi_1_1","rssi_1_2", "rssi_1_3", "rssi_2_1", "rssi_2_3", "rssi_3_1", "rssi_3_2", "rssi_3_3"], axis = 1)
    y_rssi_2_3 = y.drop(["rssi_1_1","rssi_1_2", "rssi_1_3", "rssi_2_1", "rssi_2_2", "rssi_3_1", "rssi_3_2", "rssi_3_3"], axis = 1)
    y_rssi_3_1 = y.drop(["rssi_1_1","rssi_1_2", "rssi_1_3", "rssi_2_1", "rssi_2_2", "rssi_2_3", "rssi_3_2", "rssi_3_3"], axis = 1)
    y_rssi_3_2 = y.drop(["rssi_1_1","rssi_1_2", "rssi_1_3", "rssi_2_1", "rssi_2_2", "rssi_2_3", "rssi_3_1", "rssi_3_3"], axis = 1)
    y_rssi_3_3 = y.drop(["rssi_1_1","rssi_1_2", "rssi_1_3", "rssi_2_1", "rssi_2_2", "rssi_2_3", "rssi_3_1", "rssi_3_2"], axis = 1)
    
    return y_rssi_1_1, y_rssi_1_2, y_rssi_1_3, y_rssi_2_1, y_rssi_2_2, y_rssi_2_3, y_rssi_3_1, y_rssi_3_2, y_rssi_3_3

In [8]:
#y_train lat and Lon Outdoor 90%
#y_train_lat, y_train_lon 
y_train_rssi_1_1, y_train_rssi_1_2, y_train_rssi_1_3, y_train_rssi_2_1, y_train_rssi_2_2, y_train_rssi_2_3, y_train_rssi_3_1, y_train_rssi_3_2, y_train_rssi_3_3 = y_RSSIs("DatabasesTCC/Outdoor_y_train_80%.csv")
y_train_rssi_1_1.head()

Unnamed: 0,rssi_1_1
0,-87.185714
1,-85.33
2,-90.572857
3,-96.702857
4,-91.864286


In [10]:
# X_test Outdoor X_test 10%
X_test = pd.read_csv("databasesTCC/Outdoor_X_test_20%.csv")
X_test = X_test.drop("lat", axis = 1)
X_test = X_test.drop("lon", axis = 1)
idx_test = X_test["idx"]
X_test = X_test.drop("idx", axis = 1)
X_test.head()

Unnamed: 0,dist_1,dist_2,dist_3,ang_1,cos_1,sin_1,tg_1,ang_2,cos_2,sin_2,...,ang_3,cos_3,sin_3,tg_3,delay_1,delay_2,delay_3,delay_12,delay_13,delay_23
0,1.381194,1.169714,0.327354,-166.429703,-0.972083,-0.234638,0.241377,-104.621674,-0.252435,-0.967614,...,94.650653,-0.08108,0.996708,-12.292874,6,5,1,1,5,4
1,0.664883,0.676311,1.429879,132.738945,-0.678659,0.734453,-1.082213,-28.185835,0.88142,-0.472333,...,52.781288,0.604859,0.796332,1.316558,3,3,6,0,-3,-3
2,0.983289,0.359384,1.459081,136.413773,-0.724338,0.689445,-0.951829,-21.180773,0.932445,-0.361312,...,65.55164,0.413873,0.910335,2.199551,4,2,6,2,-2,-4
3,1.232941,0.412945,1.072575,160.519728,-0.942756,0.333482,-0.353731,-106.172558,-0.278531,-0.960427,...,81.758935,0.143338,0.989674,6.904462,5,2,5,3,0,-3
4,1.062763,0.390264,1.127756,156.463782,-0.916808,0.399329,-0.435564,-79.219895,0.18704,-0.982352,...,72.359742,0.30304,0.952978,3.144731,5,2,5,3,0,-3


# LightGBM

In [11]:
import lightgbm as lgb


In [12]:
def LightGBM_TCC(X_train, y_train, X_test):

    # Feature Scaling
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    d_train = lgb.Dataset(X_train, label=y_train)
    #d_train
    params = {}
    params['learning_rate'] = 1.0
    params['boosting_type'] = 'gbdt'#'gbdt'
    params['metric'] = 'mae'
    params['sub_feature'] = 0.8
    params['num_leaves'] = 255
    params['min_data'] = 1
    #params['max_depth'] = 10
    clf = lgb.train(params, d_train, 10000)
    #Prediction
    y_pred=clf.predict(X_test)
    #y_pred     
    
    return y_pred

In [13]:
def y_pred_write_File(X_train, y_train_rssi_1_1, y_train_rssi_1_2, y_train_rssi_1_3, y_train_rssi_2_1, y_train_rssi_2_2, y_train_rssi_2_3, y_train_rssi_3_1, y_train_rssi_3_2, y_train_rssi_3_3, X_test, Metodo_Num):
    #y_train_lat, y_train_lon#
     
    y_pred_rssi_1_1 = LightGBM_TCC(X_train, y_train_rssi_1_1, X_test)
    y_pred_rssi_1_2 = LightGBM_TCC(X_train, y_train_rssi_1_2, X_test)
    y_pred_rssi_1_3 = LightGBM_TCC(X_train, y_train_rssi_1_3, X_test)
    
    y_pred_rssi_2_1 = LightGBM_TCC(X_train, y_train_rssi_2_1, X_test)
    y_pred_rssi_2_2 = LightGBM_TCC(X_train, y_train_rssi_2_2, X_test)
    y_pred_rssi_2_3 = LightGBM_TCC(X_train, y_train_rssi_2_3, X_test)
    
    y_pred_rssi_3_1 = LightGBM_TCC(X_train, y_train_rssi_3_1, X_test)
    y_pred_rssi_3_2 = LightGBM_TCC(X_train, y_train_rssi_3_2, X_test)
    y_pred_rssi_3_3 = LightGBM_TCC(X_train, y_train_rssi_3_3, X_test)
    
    #y_pred_lat = LightGBM_TCC(X_train, y_train_lat, X_test)
    #y_pred_lon = LightGBM_TCC(X_train, y_train_lon, X_test)
    
    #y_pred_lat = pd.DataFrame({'lat': y_pred_lat})
    #y_pred_lat.head()
    #y_pred_lon = pd.DataFrame({'lon': y_pred_lon})
    #y_pred_lon.head()
    
    y_pred_rssi_1_1 = pd.DataFrame({'RSSI_1_1':  y_pred_rssi_1_1})
    y_pred_rssi_1_2 = pd.DataFrame({'RSSI_1_2':  y_pred_rssi_1_2})
    y_pred_rssi_1_3 = pd.DataFrame({'RSSI_1_3':  y_pred_rssi_1_3})
    
    y_pred_rssi_2_1 = pd.DataFrame({'RSSI_2_1':  y_pred_rssi_2_1})
    y_pred_rssi_2_2 = pd.DataFrame({'RSSI_2_2':  y_pred_rssi_2_2})
    y_pred_rssi_2_3 = pd.DataFrame({'RSSI_2_3':  y_pred_rssi_2_3})
    
    y_pred_rssi_3_1 = pd.DataFrame({'RSSI_3_1':  y_pred_rssi_3_1})
    y_pred_rssi_3_2 = pd.DataFrame({'RSSI_3_2':  y_pred_rssi_3_2})
    y_pred_rssi_3_3 = pd.DataFrame({'RSSI_3_3':  y_pred_rssi_3_3})
    
    y_pred = pd.concat([y_pred_rssi_1_1, y_pred_rssi_1_2, y_pred_rssi_1_3, y_pred_rssi_2_1, y_pred_rssi_2_2, y_pred_rssi_2_3, y_pred_rssi_3_1, y_pred_rssi_3_2, y_pred_rssi_3_3], axis=1)
    #y_pred.head()
    #write_to_csv(y_pred, "ResultTCC/Resultados_Metodo_{}.csv".format(Metodo_Num))
    
    return y_pred

# Geração dos RSSIs do CDB com ML do que já foi construído:

In [20]:
df_meds_CDB = pd.read_csv("./CDB/CDB_20.csv")
df_meds_CDB = df_meds_CDB.drop("lat", axis = 1)
df_meds_CDB = df_meds_CDB.drop("lon", axis = 1)
df_meds_CDB = y_pred_write_File(X_train, y_train_rssi_1_1, y_train_rssi_1_2, y_train_rssi_1_3, y_train_rssi_2_1, y_train_rssi_2_2, y_train_rssi_2_3, y_train_rssi_3_1, y_train_rssi_3_2, y_train_rssi_3_3, df_meds_CDB, 2)
df_meds_CDB.to_csv("./CDB/CDB_20_LightGBM.csv", index=False)
df_meds_CDB.head()

Unnamed: 0,RSSI_1_1,RSSI_1_2,RSSI_1_3,RSSI_2_1,RSSI_2_2,RSSI_2_3,RSSI_3_1,RSSI_3_2,RSSI_3_3
0,-95.257429,-101.256458,-90.994183,-82.774777,-88.158168,-98.760354,-93.745712,-86.044939,-70.074586
1,-95.257429,-101.256458,-90.994291,-82.774777,-88.158559,-98.760354,-93.745712,-81.075098,-70.072108
2,-95.257429,-101.256444,-90.994291,-82.774777,-88.158104,-97.724096,-93.748641,-81.075098,-70.072108
3,-95.257429,-101.256466,-90.994291,-82.774802,-88.124843,-97.724096,-93.748641,-65.473641,-70.072108
4,-94.395072,-101.260548,-91.17127,-82.774799,-88.055176,-97.708748,-93.749146,-65.473651,-87.845521


# Outdoor 10% Test 

In [14]:
#Test Outdoor 10%
y_pred = y_pred_write_File(X_train, y_train_rssi_1_1, y_train_rssi_1_2, y_train_rssi_1_3, y_train_rssi_2_1, y_train_rssi_2_2, y_train_rssi_2_3, y_train_rssi_3_1, y_train_rssi_3_2, y_train_rssi_3_3, X_test, 2)
y_pred.head()

Unnamed: 0,RSSI_1_1,RSSI_1_2,RSSI_1_3,RSSI_2_1,RSSI_2_2,RSSI_2_3,RSSI_3_1,RSSI_3_2,RSSI_3_3
0,-94.850147,-97.363747,-94.88663,-93.881618,-93.850548,-90.411214,-77.658575,-65.838825,-93.222784
1,-100.118568,-84.908569,-97.900002,-85.949997,-98.837502,-97.332855,-99.506668,-104.389999,-94.407501
2,-96.685925,-85.961372,-92.728738,-78.489341,-90.204732,-85.760205,-90.308272,-95.264671,-98.909569
3,-92.438261,-90.242308,-93.070974,-75.959987,-91.323229,-87.823991,-86.051805,-93.248043,-97.25492
4,-86.227142,-87.246282,-89.323364,-67.540008,-79.858324,-87.267293,-89.662398,-93.29994,-97.600005


In [17]:
y_test = pd.read_csv("databasesTCC/Outdoor_y_test_20%.csv")
y_test.head()

Unnamed: 0,rssi_1_1,rssi_1_2,rssi_1_3,rssi_2_1,rssi_2_2,rssi_2_3,rssi_3_1,rssi_3_2,rssi_3_3
0,-94.588571,-97.102857,-94.058571,-96.128333,-92.275,-90.411429,-77.951667,-73.19,-94.7525
1,-103.01,-84.498571,-97.591429,-95.01,-98.8375,-97.332857,-100.65,-101.62,-94.4075
2,-95.202857,-86.28,-93.03,-53.33,-84.965,-81.08,-90.826667,-94.342,-98.9125
3,-92.422619,-90.242143,-93.064643,-80.359286,-87.1375,-86.910952,-91.338333,-93.248,-97.255
4,-86.227143,-86.231429,-87.885714,-72.83,-80.56625,-85.737143,-90.191667,-90.704,-97.6


In [84]:
def Results(y_test, y_pred):

    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
    results = [mean_absolute_error(y_test, y_pred), mean_squared_error(y_test, y_pred), np.sqrt(mean_squared_error(y_test, y_pred)), r2_score(y_test, y_pred)]
    return results

In [85]:
def ResultsMeters(y_test, y_pred):
    y_test_np = y_test.to_numpy()
    #y_test_np
    y_pred_np = y_pred.to_numpy()
    #y_pred_np
    return get_quality_metrics(y_test_np, y_pred_np)  
    

# Outdoor 10% test Result

In [66]:
results = Results(y_test, y_pred)
results

[3.174927002410336e-05,
 2.254089963970837e-09,
 4.747725733412617e-05,
 0.999811700899088]

In [68]:
metros = ResultsMeters(y_test, y_pred)
metros

min:            0.20 m
max:            29.68 m
mean:           5.68 m
std deviation:  4.76 m


# Outdoor 10% test LightGBM Figerprinting:

In [78]:
#Outdoor Fingerprinting
y_pred_OutdoorFP = pd.read_csv("FinalResult/Resultados_Metodo_1.csv")
y_pred_OutdoorFP = y_pred_OutdoorFP.drop("pontoId",axis = 1 )
y_pred_OutdoorFP.head()

Unnamed: 0,lat,lon
0,-8.05465,-34.952289
1,-8.054025,-34.951749
2,-8.05465,-34.952289
3,-8.054025,-34.951749
4,-8.053312,-34.949375


In [79]:
metros = ResultsMeters(y_test, y_pred_OutdoorFP)
metros

min:            19.48 m
max:            1151.93 m
mean:           559.21 m
std deviation:  263.15 m


# Indoor 10% test LightGBM

In [86]:
X_test_indoor = pd.read_csv("databases/indoor_X_test_10%.csv")
idx_indoor = X_test_indoor['idx']
X_test_indoor = X_test_indoor.drop("idx", axis = 1)
X_test_indoor = X_test_indoor.drop("lat", axis = 1)
X_test_indoor = X_test_indoor.drop("lon", axis = 1)
X_test_indoor.head()

Unnamed: 0,rssi_1_1,rssi_1_2,rssi_1_3,rssi_2_1,rssi_2_2,rssi_2_3,rssi_3_1,rssi_3_2,rssi_3_3,delta_1_12,...,ang_3,cos_3,sin_3,tg_3,delay_1,delay_2,delay_3,delay_12,delay_13,delay_23
0,-101.196667,-115.0,-96.04,-115.0,-96.79,-96.1,-94.42,-90.22,-115.0,13.803333,...,91.721709,-0.030045,0.999549,-33.268415,6,4,3,2,3,1
1,-100.83,-115.0,-98.56,-115.0,-94.796667,-97.648571,-99.255,-96.904,-115.0,14.17,...,94.509673,-0.078627,0.996904,-12.678838,6,3,3,3,3,0
2,-98.748,-97.85,-98.61,-98.49,-90.71,-90.82,-100.03,-95.91,-115.0,-0.898,...,91.51843,-0.026499,0.999649,-37.724728,6,3,3,3,3,0
3,-115.0,-115.0,-115.0,-115.0,-115.0,-115.0,-91.107143,-93.163333,-115.0,0.0,...,92.652203,-0.046273,0.998929,-21.587655,6,4,3,2,3,1
4,-98.748,-99.67,-98.61,-97.8,-88.115,-90.684286,-100.03,-91.585,-115.0,0.922,...,91.51843,-0.026499,0.999649,-37.724728,6,3,3,3,3,0


In [87]:
#Test Indoor 10%
y_pred_indoor = y_pred_write_File(X_train, y_train_lat, y_train_lon, X_test_indoor, 3, idx_indoor)
y_pred_indoor.head()

Unnamed: 0,lat,lon
0,-8.055488,-34.951628
1,-8.05572,-34.951331
2,-8.055462,-34.951251
3,-8.055522,-34.951651
4,-8.05547,-34.951259


In [88]:
y_test_indoor = pd.read_csv("databases/indoor_y_test_10%.csv")
y_test_indoor.head()

Unnamed: 0,lat,lon
0,-8.055506,-34.951693
1,-8.055834,-34.951362
2,-8.055494,-34.951316
3,-8.055605,-34.951728
4,-8.055494,-34.951316


In [89]:
results_indoor = Results(y_test_indoor, y_pred_indoor)
results_indoor

[7.32755443748502e-05,
 6.629763120829612e-09,
 8.142335734191764e-05,
 0.7364731959189301]

In [90]:
metros_indoor = ResultsMeters(y_test_indoor, y_pred_indoor)
metros_indoor

min:            3.59 m
max:            17.10 m
mean:           12.32 m
std deviation:  3.14 m
