In [1]:
import pandas as pd
from pathlib import Path
from fiona.crs import from_epsg
from fiona import open as fopen
from shapely.geometry import Point, LineString
import ast

In [3]:
foo = "(3.2, 1.3)"
py_tup = ast.literal_eval(foo)
py_tup

(3.2, 1.3)

In [2]:
porto_csv_path = Path('./data/sources/porto/train.csv')
df = pd.read_csv(porto_csv_path)

In [3]:
df = df.head(100000)


In [4]:
min_two_points_mask = df["POLYLINE"].str.count(",") > 1
point_filter_df = df[min_two_points_mask]

In [5]:
df.size - point_filter_df.size

13995

In [6]:
explore_one_point_dfs = df[~min_two_points_mask]

explore_one_point_dfs.head(100)

Unnamed: 0,TRIP_ID,CALL_TYPE,ORIGIN_CALL,ORIGIN_STAND,TAXI_ID,TIMESTAMP,DAY_TYPE,MISSING_DATA,POLYLINE
54,1372642886620000403,B,,57.0,20000403,1372642886,A,False,"[[-8.610912,41.145786]]"
114,1372644692620000173,C,,,20000173,1372644692,A,False,"[[-8.598186,41.145633]]"
142,1372644011620000463,C,,,20000463,1372644011,A,False,"[[-8.617662,41.14638]]"
190,1372650895620000403,C,,,20000403,1372650895,A,False,"[[-8.615817,41.147298]]"
240,1372653466620000403,B,,30.0,20000403,1372653466,A,False,"[[-8.62704,41.15187]]"
...,...,...,...,...,...,...,...,...,...
6119,1372696152620000981,C,,,20000981,1372696152,A,False,[]
6281,1372763893620000089,B,,31.0,20000089,1372763893,A,False,"[[-8.663787,41.177196]]"
6285,1372761554620000557,B,,9.0,20000557,1372761554,A,False,"[[-8.60526,41.144265]]"
6316,1372765426620000398,C,,,20000398,1372765426,A,False,"[[-8.63361,41.158791]]"


In [7]:
schema = {
    'geometry': 'LineString',
    'properties': {
        'trip_id': 'int',
        'call_type': 'str:80',
        # 'origin_call': 'float:24.5',
        # 'origin_stand': 'float:24.5',
        'taxi_id': 'int',
        'timestamp': 'int',
        'day_type': 'str:80',
        'missing_data': 'int',
    }
}

porto_output_path = Path('./data/generated/train').resolve()

def append_to_shp(row, shp):
    polyline = ast.literal_eval(row['POLYLINE'])
    line_string = LineString(polyline)
    shp.write({
        'geometry': {
            'type': 'LineString',
            'coordinates': line_string.coords,
        },
        'properties': {
            'trip_id': row['TRIP_ID'],
            'call_type': row['CALL_TYPE'],
            # 'origin_call': row['ORIGINAL_CALL'],
            # 'origin_stand': row['ORIGIN_STAND'],
            'taxi_id': row['TAXI_ID'],
            'timestamp': row['TIMESTAMP'],
            'day_type': row['DAY_TYPE'],
            'missing_data': row['MISSING_DATA'],
        }
    })  
    

In [8]:
with fopen(porto_output_path, 'w', driver='ESRI Shapefile', crs=from_epsg(4326), schema=schema) as shp:
    point_filter_df.apply(append_to_shp, shp=shp, axis=1)