# Import libraries

In [167]:
import random as rnd
import pandas as pd
import pandas as pd
import plotly.express as px
from sklearn.cluster import KMeans
import time
import pickle
import random

# Simulate GEO data from colombia

In [None]:

def generar_coordenadas_bogota(n=10):
    # Límites aproximados de Bogotá (en grados decimales)

    lat_min, lat_max = 4.755422,4.678025
    lon_min, lon_max = -74.075022,-74.135649
    #lat_min, lat_max = 4.48, 4.83
    #lon_min, lon_max = -74.20, -73.99
    #limit_upper_lat=[4.755422,4.678025]
    #limit_lower_lat=[4.663925,4.553890]
    #limit_upper_lon=[74.075022,74.135649]
    #limit_lower_lon=[74.047307,74.087928]
    
    coordenadas = []
    for r in range(n):
        lat = round(random.uniform(lat_min, lat_max), 7)
        lon = round(random.uniform(lon_min, lon_max), 7)
        day = (rnd.randint(0,6))
        coordenadas.append((r,lat, lon,day))
        
    df_coordenadas = pd.DataFrame(coordenadas, columns=['id','lat', 'lon', 'delivery_day'])

    return df_coordenadas

In [149]:
def crear_mapa(df):
    """
    Crea un mapa interactivo de las coordenadas utilizando Plotly Express.
    
    Args:
        df (DataFrame): DataFrame que contiene las columnas 'lat', 'lon', 'id' y 'delivery_day'.
    """
    # Asegurarse de que las columnas lat y lon estén en el formato correcto
    df['lat'] = df['lat'].astype(float)
    df['lon'] = df['lon'].astype(float)

    # Crear el mapa
    fig = px.scatter_mapbox(df ,
                             lat="lat" ,
                             lon="lon" ,
                             hover_name="id" ,
                             hover_data=["id"] ,
                             color= "delivery_day" ,
                             zoom = 10 ,
                             height = 500,
                             width=400,
                             color_continuous_scale=px.colors.cyclical.IceFire)
    
    fig.update_layout(mapbox_style= "carto-positron")
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.show()

In [None]:

coordenadas = generar_coordenadas_bogota(300)
display(coordenadas.head())
crear_mapa(coordenadas)

Unnamed: 0,id,lat,lon,delivery_day
0,0,4.697693,-74.10629,0
1,1,4.703444,-74.135069,1
2,2,4.711193,-74.13541,4
3,3,4.703426,-74.113132,3
4,4,4.751838,-74.095373,5


# Create Kmeans cluster algorithm

In [151]:
# Aplicar K-means
kmeans = KMeans(n_clusters=6, random_state=0, n_init="auto",).fit(coordenadas[['lat','lon']])

coordenadas['delivery_day']=kmeans.labels_

In [152]:
crear_mapa(coordenadas)

# Predict NEW DATA 

## Using Kmeans to predict

In [None]:
df_new=generar_coordenadas_bogota(10000)
crear_mapa(df_new)
df_new["delivery_day"]=kmeans.predict(df_new[['lat','lon']])
crear_mapa(df_new)

## Using KNN to predict

In [168]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

n_neighbors = 15

X_train, X_test, y_train, y_test = train_test_split(df_new[['lat','lon']], df_new["delivery_day"], random_state=0,test_size=0.2) 

knn = KNeighborsClassifier(n_neighbors)
knn.fit(X_train, y_train)

print('Accuracy of K-NN classifier on training set: {:.2f}'
     .format(knn.score(X_train, y_train)))
print('Accuracy of K-NN classifier on test set: {:.2f}'
     .format(knn.score(X_test, y_test)))

Accuracy of K-NN classifier on training set: 0.99
Accuracy of K-NN classifier on test set: 0.99


In [169]:
df_for_KNN=generar_coordenadas_bogota(100)[["id","lat","lon"]]

df_for_KNN["delivery_day"]=knn.predict(df_for_KNN[["lat","lon"]])

crear_mapa(df_for_KNN)


# Save model (export)

In [172]:
# save the model to disk
filename = 'models_trained/finalized_model_KNN.sav'
pickle.dump(knn, open(filename, 'wb'))

# Predict 1 M of rows

In [165]:
testing=generar_coordenadas_bogota(1000000)[["lat","lon"]]

start_time = time.time()
testing["delivery_day"]=kmeans.predict(testing[['lat','lon']])
print("--- %s seconds ---" % (time.time() - start_time))

--- 0.011699914932250977 seconds ---


In [166]:
testing=generar_coordenadas_bogota(1000000)[["lat","lon"]]

start_time = time.time()
testing["delivery_day"]=knn.predict(testing[['lat','lon']])
print("--- %s seconds ---" % (time.time() - start_time))

--- 10.650933027267456 seconds ---
