# Community Prediction based on Taxi Dataset

## Load dataset

In [66]:
import csv
import json
from datetime import datetime
from typing import Iterator

def load_csv_content() -> Iterator:
    '''Returns a generator for all lines in the csv file with correct field types.'''
    
    with open('train.csv') as csv_file:
        reader = csv.reader(csv_file)    

        headers = [h.lower() for h in next(reader)]

        for line in reader:
            # convert line fields to correct type
            for i in range(len(headers)):
                # trip_id, call_type, day_type AS string
                if i in [0, 1, 6]:
                    continue
                # origin_call, origin_stand, taxi_id AS int
                elif i in [2, 3, 4]:
                    line[i] = int(line[i]) if line[i] != "" else ""
                # timestamp AS timestamp
                elif i == 5:
                    line[i] = datetime.fromtimestamp(int(line[i]))
                # missing_data AS bool
                elif i == 7: 
                    line[i] = line[i].lower() == 'true'
                # polyline AS List[List[float]]
                elif i == 8:
                    line[i] = json.loads(line[i])

            entry = dict(zip(headers, line))
            yield entry


## Display some dataset routes

In [67]:
from typing import List
import folium

def displayNodes(nodes: List[List[float]]):  
    '''
    Displays the nodes on a map of the city.

    :param nodes: A list of coordinates, eg. [[1,2],[1,3]]
    '''
    m = folium.Map(location=[41.15,-8.6],tiles='stamenterrain',zoom_start=12, control_scale=True) 

    for idx, node in enumerate(nodes): 
        popupLabel = idx

        folium.Marker(
          location=[node[1], node[0]],
          #popup='Cluster Nr: '+ str(node.cluster_no),
          popup=popupLabel,
          icon=folium.Icon(color='red', icon='circle'),
        ).add_to(m)
      
    display(m)

In [68]:
content = load_csv_content()

In [69]:
displayNodes(next(content)['polyline'])