## Data Import
1. Use Pandas to import the pickle file of carsharing Trip Data
2. Take a look at the structure of the dataset by displaying it ( display())

In [None]:
import pandas as pd
import geopandas as gpd
import shapely.wkb
from shapely.geometry import Point
from matplotlib import pyplot as plt
%matplotlib inline
trips = pd.read_pickle('data/trips.p')
trips = gpd.GeoDataFrame(trips, geometry = 'start_geom')

display(trips.head())

Plot the start position of the trips

In [None]:
fig, ax = plt.subplots(figsize = (10,10))
ax.scatter(trips.start_geom.x.values,trips.start_geom.y.values, s=0.4)
fig.show()

1. What is the period the data was collected? 
2. How many different cars are included in the dataset?
3. Which vehicle has done the most trips? How many is it?

In [None]:
# Your code here
#<<solution>>
#1
print ('Start: '+ str(min(trips.start_time)))
print ('End: ' + str(max(trips.stop_time)))
print ('Duration: '+ str( max(trips.start_time)-min(trips.stop_time) ))
#2
print('Number of Vehicles: '+ str( pd.Series(trips.qnr).unique().size ))
#3
print('Vehicle with most Trips: '+ str( pd.Series(trips.qnr).value_counts().index[0] ))
print('Number of Trips: '+ str( pd.Series(trips.qnr).value_counts().values[0] ))
#<</solution>>

Create a Heatmap of all trip start locations with folium.    
Try different parameters for the heatmap creation.

In [None]:
import folium
from folium.plugins import HeatMap
import numpy as np

# Create a Basemap drawing fuction
def create_Basemap():
    hmap = folium.Map(location=[48.265035, 11.668141], zoom_start=12, tiles='cartodbpositron') # Garching center
    return hmap

# create a Heatmap drawing function
def draw_Heatmap(indata, bmap):
    datalist = list(zip(indata.y.values,indata.x.values)) # get lat lon from shapely object
    HeatMap(datalist, 
            min_opacity=0.2,
            radius=10, blur=25,
            max_zoom=1).add_to(bmap)
    return bmap
    
basemap = create_Basemap()
draw_Heatmap(trips.start_geom,basemap)

For further analysis, plot the distribution of trip start time and their duration. What do you notice?

In [None]:
trips['duration']= (trips.stop_time-trips.start_time).apply(lambda x: x.seconds/3600.)
trips['start_hour']= trips.start_time.apply(lambda x: (x.hour*60.0+x.minute)/60)
trips['avg_speed'] = trips.dist/(trips.duration)
fig, ax = plt.subplots(figsize = (15,10))
ax.scatter(trips.start_hour, trips.duration, s= 0.3)
ax.set_ylim(0,4)
ax.set_xlim(0,24)
ax.set_xlabel('Hour of day')
ax.set_ylabel('Duration in h')
fig.show()

Now we use scipys KDE function to generate a KDE of the start times

In [None]:
from scipy import stats
min_v = np.min(trips.start_hour)
max_v = np.max(trips.start_hour)
grid = np.linspace(min_v, max_v, 100)

kernel = stats.gaussian_kde(trips.start_hour, 0.1)
fig, ax = plt.subplots()
ax.plot(grid,kernel.evaluate(grid), 'r-')
ax.set_xlabel('Values')
ax.set_ylabel('Density')
fig.show()

Play around with heatmaps. Use different values and filters. What can you find out about the dataset?

In [None]:
condition= (trips.start_hour > 6) & (trips.start_hour < 8)
basemap = create_Basemap()
trips = trips.set_geometry('stop_geom')
draw_Heatmap(trips[condition].stop_geom,basemap)