# **GeolifePrj_01_df2gjson.ipynb**
## Convierte los DataFrames creados en **GeolifePrj_00_plt2df** (archivos geolife_xxx.zip) a archivos GeoJson simplificados:
## **geolife_xxx_simpl.geojson** (puntos gps=(lat,lon) y su date="fecha/hora" respectiva)
## **geolife_xxx_lines.geojson** (una linea por trayectoria)
## **geolife_xxx_linesimpl.geojson** (lineas despues de simplificar la data)


In [8]:
import geopandas
import numpy as np
import pandas as pd
#from shapely.geometry import Point
from geojson import  Point,Feature, FeatureCollection, dumps

import csv, json

import seaborn as sns

from shapely.geometry import LineString
from shapely.geometry import Point

from dateutil.parser import parse as parsedt

### def **plt2geojson(...):** Convierte archivo original plt a geojson
### def **simplecsv2geojson(...)** Convierte un archivo **geolife_xxx_simpl.csv** generado por la funcion **crea_csv_simpl():** a un archivo geojson

In [9]:
def plt2geojson(from_plt_file,to_geojson_file):
    features = []
    with open(from_plt_file, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        for _ in range(6): next(reader)
        for latitude,longitude,empty,altitude,days,date,time in reader:
            latitude, longitude = map(float, (latitude, longitude))
            features.append(
                Feature(
                    geometry = Point((longitude, latitude)),
                    properties = {
                        'altitude': altitude,
                        'date': f"{date} {time}"
                    }
                )
            )

    collection = FeatureCollection(features)
    with open(to_geojson_file, "w") as f:
        f.write(f"{collection}")
        
def simplecsv2geojson(from_csv_file,to_geojson_file):
    features = []
    with open(from_csv_file, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        next(reader)
        for date,latitude,longitude,label in reader:
            latitude, longitude = map(float, (latitude, longitude))
            features.append(
                Feature(
                    geometry = Point((longitude, latitude)),
                    properties = {
                        'date': f"{date}",
                        'label': f"{label}"
                    }
                )
            )

    collection = FeatureCollection(features)
    with open(to_geojson_file, "w") as f:
        f.write(f"{collection}")

### def **df2geojson(...)** genera desde **geolife_xxx.zip** un archivo **geolife_xxx.geojson** (TODA LA DATA)

In [10]:
def df2geojson(df,to_geojson_file):
    df=df.astype(str)
    lat = df['lat']
    lng = df['lon']
    #df = df.drop(columns=['lat', 'lon'])
    df = df.drop(columns=['lat', 'lon','user'])    
    
    feat_list = []
    failed = []
    for i in range(0, len(df.index)):
        props = dict(df.iloc[i])
        try:
            f = Feature(geometry=Point((float(lng.iloc[i]),float(lat.iloc[i]))),
                       properties = props)
            feat_list.append(f)
        except Exception as ex:
            print("Exception:",ex)
        
    collection = FeatureCollection(feat_list)
    with open(to_geojson_file, "w") as f:
        f.write(f"{collection}")

## def **crea_simpl_csv(**xxx**):** genera una version simplificada del DataFrame almacenado en **geolife_xxx.zip** y lo guarda como **geolife_xxx_simpl.csv**
### Simplificaciones:
### - *Guarda solo 5 digitos decimales para latitude y longitude*
### - *Bota la columna 'user' y 'altitude'*
### - *Guarda solo maximo UNA coordena gps por minuto (si hay mas de un punto gps por minuto desecha los sobrantes)*

In [14]:
%%time
#creamos una version simplificada en csv
def crea_simpl_csv(xxx):
    usrdf = pd.read_pickle(f'geolife_{xxx:03d}.zip')
    usrdf.columns=['date','latitude','longitude','altitude','label','user']
    #dejamos 5 digitos de precision, que equivale a una resolucion de 8m^2 en el mapa. Explicacion: https://planspace.org/20180719-geolocation_precision_by_digit/
    usrdf['latitude']=usrdf['latitude'].apply(lambda x:f"{x:.5f}")
    usrdf['longitude']=usrdf['longitude'].apply(lambda x:f"{x:.5f}")
    usrdf.drop(columns=['user','altitude'],inplace=True)
    usrdf=usrdf.astype(str)
    #elimina los segundos y luego los repetidos, con lo que solo nos quedamos con una resolucion de 
    #un punto gps por minutos:
    usrdf['date']=usrdf['date'].apply(lambda x:x[:-3]+':00')
    usrdf = usrdf.drop_duplicates(subset='date', keep='first')
    usrdf.to_csv(f"geolife_{xxx:03d}_simpl.csv", index=False)


Wall time: 0 ns


#### Creamos los CSV simplificados

In [None]:
for xxx in range(0,182):
    crea_simpl_csv(xxx)    
    ## Muestra avance ;) [########>-------------------------------------] 
    print("["+"#"*int(100*(1+xxx)/182)+">"+"-"*int(100*(181-xxx)/181)+"]"+f" {xxx:03d}/182",end="\r")

#### Generamos **geolife_xxx_simpl.geojson** a partir de **geolife_xxx_simple.csv**

In [7]:
%%time
#creamos una version simplificada en geojson
for usuario_num in range(0,182):
    simplecsv2geojson(f"geolife_{usuario_num:03d}_simpl.csv",f"geolife_{usuario_num:03d}_simpl.geojson")
    ## Avance ;)
    print("["+"#"*int(100*(1+usuario_num)/182)+">"+"-"*int(100*(181-usuario_num)/181)+"]"+f" {usuario_num:03d}/182",end="\r")

Wall time: 24.7 s####################################################################################>] 181/182


## Ahora generamos
## **geolife_xxx_lines.geojson** (una linea por trayectoria)
## **geolife_xxx_linesimpl.geojson** (lineas despues de simplificar la data)

In [29]:
xxx=52
archivo_gj=f"geolife_{xxx:03d}_"
archivo_salida_gj = archivo_gj+"lines.geojson"
archivo_gj=archivo_gj+"simpl.geojson"
print(archivo_gj," -> " ,archivo_salida_gj)
gdf = geopandas.read_file(archivo_gj)
gdf

geolife_052_simpl.geojson  ->  geolife_052_lines.geojson


Unnamed: 0,date,label,geometry
0,2008-01-16T07:36:00,0,POINT (116.25773 39.91637)
1,2008-01-16T07:37:00,0,POINT (116.25868 39.92080)
2,2008-01-16T07:39:00,0,POINT (116.26585 39.92275)
3,2008-01-16T07:40:00,0,POINT (116.26950 39.92432)
4,2008-01-16T07:42:00,0,POINT (116.26862 39.94303)
...,...,...,...
21992,2008-10-30T07:33:00,0,POINT (116.32586 39.97504)
21993,2008-10-30T07:34:00,0,POINT (116.32754 39.97511)
21994,2008-10-30T07:35:00,0,POINT (116.32909 39.97522)
21995,2008-10-30T07:36:00,0,POINT (116.33010 39.97530)


In [28]:
for dia in list(gdf.date.str[:10].unique()):
    gdf.loc[gdf.date.str[:10]]

str

In [None]:
gdf.geometry.iloc[10:100].to_list()

In [37]:
list_dates=list(gdf.date.str[:10].unique())

In [43]:
gdf.loc[gdf.date.str[:10]==list_dates[50]]

Unnamed: 0,date,label,geometry
9080,2008-07-01T08:38:00,0,POINT (116.24674 39.90606)
9081,2008-07-01T08:39:00,0,POINT (116.24727 39.90595)
9082,2008-07-01T08:40:00,0,POINT (116.24853 39.90627)
9083,2008-07-01T08:41:00,0,POINT (116.24890 39.90592)
9084,2008-07-01T08:43:00,0,POINT (116.24890 39.90592)
...,...,...,...
9216,2008-07-01T11:25:00,0,POINT (116.35274 39.98524)
9217,2008-07-01T11:26:00,0,POINT (116.35276 39.98523)
9218,2008-07-01T11:27:00,0,POINT (116.35276 39.98522)
9219,2008-07-01T11:28:00,0,POINT (116.35285 39.98541)


In [433]:

dfxb = geopandas.read_file("map.geojson")
dfxb.w

Unnamed: 0,marker-color,marker-size,marker-symbol,stroke,stroke-width,stroke-opacity,fill,fill-opacity,geometry
0,#d77070,small,square,,,,,,POINT (84.26239 42.79540)
1,,,,#e7dada,2.0,1.0,#ffffff,0.0,"POLYGON ((84.35303 42.86590, 84.33380 42.75710..."
2,,,,,,,,,"LINESTRING (84.56039 42.77323, 84.50684 42.899..."


In [432]:
df = pd.read_pickle("geolife_106.zip")
df.drop(columns=['time','alt','label','user'],inplace=True)
df['marker-size']="small"
df['marker-color']="small"

In [424]:
gdf = geopandas.GeoDataFrame(
    df, geometry=geopandas.points_from_xy(df.lat, df.lon))

In [426]:

gdf.to_file("geolife_106.geojson", driver="GeoJSON")

In [176]:
gdf = geopandas.read_file("geolife_005_simpl.geojson")
gdf

Unnamed: 0,date,label,geometry
0,2008-10-24T04:12:00,0,POINT (116.32134 40.00415)
1,2008-10-24T04:13:00,0,POINT (116.32148 40.00351)
2,2008-10-24T04:14:00,0,POINT (116.32151 40.00173)
3,2008-10-24T04:15:00,0,POINT (116.32173 39.99988)
4,2008-10-24T04:16:00,0,POINT (116.32378 39.99957)
...,...,...,...
8757,2009-03-19T05:42:00,0,POINT (116.32462 40.00111)
8758,2009-03-19T05:43:00,0,POINT (116.32560 40.00109)
8759,2009-03-19T05:44:00,0,POINT (116.32645 40.00114)
8760,2009-03-19T05:45:00,0,POINT (116.32707 40.00102)


In [177]:
gdf.geometry.iloc[10:100].to_list()

[<shapely.geometry.point.Point at 0x18c5db74608>,
 <shapely.geometry.point.Point at 0x18c59233148>,
 <shapely.geometry.point.Point at 0x18c592330c8>,
 <shapely.geometry.point.Point at 0x18c59233348>,
 <shapely.geometry.point.Point at 0x18c592333c8>,
 <shapely.geometry.point.Point at 0x18c59233588>,
 <shapely.geometry.point.Point at 0x18c59233608>,
 <shapely.geometry.point.Point at 0x18c592337c8>,
 <shapely.geometry.point.Point at 0x18c59233848>,
 <shapely.geometry.point.Point at 0x18c59233a08>,
 <shapely.geometry.point.Point at 0x18c59233ac8>,
 <shapely.geometry.point.Point at 0x18c59233e48>,
 <shapely.geometry.point.Point at 0x18c59233f88>,
 <shapely.geometry.point.Point at 0x18c5e59de48>,
 <shapely.geometry.point.Point at 0x18c5923ef48>,
 <shapely.geometry.point.Point at 0x18c592181c8>,
 <shapely.geometry.point.Point at 0x18c59218248>,
 <shapely.geometry.point.Point at 0x18c59218408>,
 <shapely.geometry.point.Point at 0x18c59218488>,
 <shapely.geometry.point.Point at 0x18c59218648>,
