# Archivo:GeolifePrj_01_df2gjson
## Convierte los DataFrames a GeoJson

In [1]:
import geopandas
import numpy as np
import pandas as pd
#from shapely.geometry import Point
from geojson import  Point,Feature, FeatureCollection, dumps

import csv, json

import seaborn as sns

In [6]:
def plt2geojson(from_plt_file,to_geojson_file):
    features = []
    with open(from_plt_file, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        for _ in range(6): next(reader)
        for latitude,longitude,empty,altitude,days,date,time in reader:
            latitude, longitude = map(float, (latitude, longitude))
            features.append(
                Feature(
                    geometry = Point((longitude, latitude)),
                    properties = {
                        'altitude': altitude,
                        'date': f"{date} {time}"
                    }
                )
            )

    collection = FeatureCollection(features)
    with open(to_geojson_file, "w") as f:
        f.write(f"{collection}")
        
def simplecsv2geojson(from_csv_file,to_geojson_file):
    features = []
    with open(from_csv_file, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        next(reader)
        for date,latitude,longitude,label in reader:
            latitude, longitude = map(float, (latitude, longitude))
            features.append(
                Feature(
                    geometry = Point((longitude, latitude)),
                    properties = {
                        'date': f"{date}",
                        'label': f"{label}"
                    }
                )
            )

    collection = FeatureCollection(features)
    with open(to_geojson_file, "w") as f:
        f.write(f"{collection}")

In [3]:
def df2geojson(df,to_geojson_file):
    df=df.astype(str)
    lat = df['lat']
    lng = df['lon']
    #df = df.drop(columns=['lat', 'lon'])
    df = df.drop(columns=['lat', 'lon','label','user'])    
    
    feat_list = []
    failed = []
    for i in range(0, len(df.index)):
        props = dict(df.iloc[i])
        try:
            f = Feature(geometry=Point((float(lng.iloc[i]),float(lat.iloc[i]))),
                       properties = props)
            feat_list.append(f)
        except Exception as ex:
            print("Exception:",ex)
        
    collection = FeatureCollection(feat_list)
    with open(to_geojson_file, "w") as f:
        f.write(f"{collection}")

In [4]:
%%time
#creamos una version simplificada en csv
sin_avance = "["+"-"*100+"]"
usuarios_df={}
for usuario_num in range(0,182):
    usuarios_df[usuario_num] = pd.read_pickle(f'geolife_{usuario_num:03d}.zip')
    usuarios_df[usuario_num].columns=['date','latitude','longitude','altitude','label','user']
    #dejamos 4 digitos de precision, que equivale a una resolucion de 8m^2 en el mapa. Explicacion: https://planspace.org/20180719-geolocation_precision_by_digit/
    usuarios_df[usuario_num]['latitude']=usuarios_df[usuario_num]['latitude'].apply(lambda x:f"{x:.5f}")
    usuarios_df[usuario_num]['longitude']=usuarios_df[usuario_num]['longitude'].apply(lambda x:f"{x:.5f}")
    usuarios_df[usuario_num].drop(columns=['user','altitude'],inplace=True)
    usuarios_df[usuario_num]=usuarios_df[usuario_num].astype(str)
    #elimina los segundos y luego los repetidos, con lo que solo nos quedamos con una resolucion de 
    usuarios_df[usuario_num]['date']=usuarios_df[usuario_num]['date'].apply(lambda x:x[:-3]+':00')
    usuarios_df[usuario_num] = usuarios_df[usuario_num].drop_duplicates(subset='date', keep='first')
    usuarios_df[usuario_num].to_csv(f"geolife_{usuario_num:03d}_simpl.csv", index=False)
    ## Avance ;)
    print("["+"#"*int(100*(1+usuario_num)/182)+">"+"-"*int(100*(181-usuario_num)/181)+"]"+f" {usuario_num:03d}/182",end="\r")


Wall time: 2min 14s##################################################################################>] 181/182


In [7]:
%%time
#creamos una version simplificada en geojson
for usuario_num in range(0,182):
    simplecsv2geojson(f"geolife_{usuario_num:03d}_simpl.csv",f"geolife_{usuario_num:03d}_simpl.geojson")
    ## Avance ;)
    print("["+"#"*int(100*(1+usuario_num)/182)+">"+"-"*int(100*(181-usuario_num)/181)+"]"+f" {usuario_num:03d}/182",end="\r")


Wall time: 24.7 s####################################################################################>] 181/182


In [180]:
plt2geojson("data/007/Trajectory/20081028161937.plt","geo_007.geojson")

In [None]:

usuarios_df[usuario_num]['date']=usuarios_df[usuario_num]['date'].apply(lambda x:x[:-3]+':00')
usuarios_df[usuario_num] = usuarios_df[usuario_num].drop_duplicates(subset='date', keep='first')

In [358]:
dfx = geopandas.read_file("geolife_simpl_147.geojson")
geopandas.to_


Unnamed: 0,date,geometry
0,2011-02-24T18:45:00,POINT (116.32880 39.97520)
1,2011-02-24T18:46:00,POINT (116.32930 39.97530)
2,2011-02-24T18:47:00,POINT (116.32930 39.97530)
3,2011-02-24T18:48:00,POINT (116.32970 39.97490)
4,2011-02-24T18:49:00,POINT (116.32950 39.97530)
...,...,...
2921,2011-05-19T09:19:00,POINT (116.30290 39.97510)
2922,2011-05-19T09:20:00,POINT (116.30280 39.97580)
2923,2011-05-19T09:21:00,POINT (116.30280 39.97670)
2924,2011-05-19T09:22:00,POINT (116.30280 39.97730)


In [332]:
dfx = geopandas.read_file("geolife_simpl_006.geojson")
dfx

Unnamed: 0,date,geometry
0,2008-10-23T06:59:00,POINT (116.32010 39.98410)
1,2008-10-23T07:00:00,POINT (116.31970 39.98490)
2,2008-10-23T07:01:00,POINT (116.31960 39.98520)
3,2008-10-23T07:02:00,POINT (116.32010 39.98510)
4,2008-10-23T07:03:00,POINT (116.32050 39.98470)
...,...,...
2730,2008-12-11T08:18:00,POINT (116.34230 39.98200)
2731,2008-12-11T08:19:00,POINT (116.34140 39.98170)
2732,2008-12-11T08:20:00,POINT (116.34010 39.98210)
2733,2008-12-11T08:21:00,POINT (116.33960 39.98180)


In [419]:
usuario_num=106
usuarios_df[usuario_num] = pd.read_csv(f'geolife_simpl_{usuario_num:03d}.csv').astype(str)
usuarios_df[usuario_num].dtypes

date         object
latitude     object
longitude    object
dtype: object

In [383]:
#for i in range(0,200):
#    print(i,usuarios_df[usuario_num].iloc[i].tolist())
    
type(usuarios_df[usuario_num].iloc[1].tolist()[1])

str

In [420]:
usuarios_df[usuario_num]

Unnamed: 0,date,latitude,longitude
0,2007-10-08 01:56:00,40.0137,116.4735
1,2007-10-08 01:57:00,40.0116,116.4798
2,2007-10-08 01:58:00,40.0006,116.4908
3,2007-10-08 01:59:00,39.9972,116.4926
4,2007-10-08 02:00:00,40.0041,116.4993
...,...,...,...
839,2007-10-10 13:15:00,39.9755,116.3395
840,2007-10-10 13:16:00,39.9754,116.3378
841,2007-10-10 13:17:00,39.9753,116.3306
842,2007-10-10 13:18:00,39.9757,116.3309


In [417]:
from sklearn.cluster import KMeans
import numpy as np

x = usuarios_df[usuario_num][['latitude','longitude']].astype(float).to_numpy()

km = KMeans()
km.fit(x)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=8, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=None, tol=0.0001, verbose=0)

In [418]:
km.cluster_centers_

array([[ 39.91043291, 116.41469304],
       [ 39.9872506 , 116.33174458],
       [ 40.02404235, 116.51449294],
       [ 39.91366571, 116.21412286],
       [ 39.99766739, 116.46560507],
       [ 39.88887091, 116.48109182],
       [ 39.9972    , 116.23950435],
       [ 39.96227717, 116.4286937 ]])

In [403]:
 X = np.array([[1, 2,4], [1, 4,3], [1, 0,2], [10, 2,9]])

In [406]:
np.array([[1, 2,4], [1, 4,3], [1, 0,2], [10, 2,9]])


array([[ 1,  2,  4],
       [ 1,  4,  3],
       [ 1,  0,  2],
       [10,  2,  9]])

In [416]:
usuarios_df[usuario_num][['latitude','longitude']].astype(float).to_numpy()


array([[ 40.0137, 116.4735],
       [ 40.0116, 116.4798],
       [ 40.0006, 116.4908],
       ...,
       [ 39.9753, 116.3306],
       [ 39.9757, 116.3309],
       [ 39.9756, 116.3309]])

In [422]:
dfx = geopandas.read_file("geolife_simpl_106.geojson")
dfx

Unnamed: 0,date,geometry
0,2007-10-08T01:56:00,POINT (116.47350 40.01370)
1,2007-10-08T01:57:00,POINT (116.47980 40.01160)
2,2007-10-08T01:58:00,POINT (116.49080 40.00060)
3,2007-10-08T01:59:00,POINT (116.49260 39.99720)
4,2007-10-08T02:00:00,POINT (116.49930 40.00410)
...,...,...
839,2007-10-10T13:15:00,POINT (116.33950 39.97550)
840,2007-10-10T13:16:00,POINT (116.33780 39.97540)
841,2007-10-10T13:17:00,POINT (116.33060 39.97530)
842,2007-10-10T13:18:00,POINT (116.33090 39.97570)


In [433]:

dfxb = geopandas.read_file("map.geojson")
dfxb.w

Unnamed: 0,marker-color,marker-size,marker-symbol,stroke,stroke-width,stroke-opacity,fill,fill-opacity,geometry
0,#d77070,small,square,,,,,,POINT (84.26239 42.79540)
1,,,,#e7dada,2.0,1.0,#ffffff,0.0,"POLYGON ((84.35303 42.86590, 84.33380 42.75710..."
2,,,,,,,,,"LINESTRING (84.56039 42.77323, 84.50684 42.899..."


In [423]:
df = pd.read_pickle("geolife_106.zip")
df.drop(['time','a'])

Unnamed: 0,time,lat,lon,alt,label,user
0,2007-10-08 01:56:45,40.013683,116.473467,154.199475,4,106
1,2007-10-08 01:56:58,40.012967,116.476683,154.199475,4,106
2,2007-10-08 01:57:13,40.011567,116.479800,147.637795,4,106
3,2007-10-08 01:57:26,40.009733,116.482167,141.076115,4,106
4,2007-10-08 01:58:37,40.000600,116.490833,147.637795,4,106
...,...,...,...,...,...,...
452,2007-10-10 13:17:14,39.975367,116.330433,180.446194,4,106
453,2007-10-10 13:17:34,39.975483,116.331583,173.884514,4,106
454,2007-10-10 13:17:45,39.975700,116.331733,177.165354,4,106
455,2007-10-10 13:18:08,39.975733,116.330900,180.446194,4,106


In [421]:


from geopy.distance import distance
distance(p1, p2)
Out: Distance(229.883275249)
distance(p1, p2).km
Out: 229.88327524944066

NameError: name 'p1' is not defined

In [432]:
df = pd.read_pickle("geolife_106.zip")
df.drop(columns=['time','alt','label','user'],inplace=True)
df['marker-size']="small"
df['marker-color']="small"

In [424]:
gdf = geopandas.GeoDataFrame(
    df, geometry=geopandas.points_from_xy(df.lat, df.lon))

In [426]:

gdf.to_file("geolife_106.geojson", driver="GeoJSON")