# Data Visualisation

In [None]:
# Betriebssystem
import os
# Plotbibliothek
import matplotlib
import matplotlib.pyplot as plt
# Effizientes Tabellenformat (DataFrames)
import pandas as pd
# Luxus-Plot für DataFrames
from pandas.plotting import scatter_matrix
# Numerik-Bibliothek
import numpy as np
# More plot styles
import seaborn as sns
# Clickable d3.js-Plots
import plotly as py
import plotly.offline as po

# Plots sollen im Notebook erscheinen
%matplotlib inline

## Analyse Emmas trace

In [None]:
df = pd.read_json('https://s3-eu-west-1.amazonaws.com/ifvworkshopdata/emma1000.json')
df.head()

Plot velocity histogram with Matplotlib

In [None]:
plt.hist(df['v'])

Plot in seaborn - nicer!

In [None]:
sns.distplot(df['v'])
plt.savefig('emmavelocity.png')

In [None]:
data = [ dict(
        type = 'scatter',
        y = df['v'],
        name = 'velocity',
        line=dict( shape='spline' , filly = 'zero')
        ),
        dict(
        type = 'area',
        y = df['z']-550,
        name = 'Rel. altitude',
            line=dict(shape='spline' ,
                     smoothing = .05)
        )
       ]
layout = dict(
        title = 'Emmas velocity',
        yaxis=dict(
        range=[0, 10]
    ),
    )
fig = dict( data=data, layout=layout )
po.plot(fig, validate=False, filename='emmavelocity.html')

## Trying DB open data set

In [None]:
df = pd.read_csv(
    'http://download-data.deutschebahn.com/static/datasets/haltestellen/D_Bahnhof_2017_09.csv',
    sep = ';',decimal=",")
df.head()

Histogram of Latitudes and Longitudes

In [None]:
df[["BREITE", "LAENGE"]].hist()

Scatterplot of Stations

In [None]:
df['TYPE'] = 100*(df['VERKEHR'] == 'FV')
df.plot(kind="scatter", y="BREITE", x="LAENGE", alpha = 0.5, figsize=(6,10), c = "TYPE", cmap = "RdYlGn", legend = True)

Plot station density

In [None]:
df.plot(kind="hexbin", y="BREITE", x="LAENGE", gridsize = 20, cmap = "Blues", legend = True)

Try yourself, e.g.:
    - ‘line’ : line plot (default)
    - ‘bar’ : vertical bar plot
    - ‘barh’ : horizontal bar plot
    - ‘hist’ : histogram
    - ‘box’ : boxplot
    - ‘kde’ : Kernel Density Estimation plot
    - ‘density’ : same as ‘kde’
    - ‘area’ : area plot
     -‘pie’ : pie plot
    - ‘scatter’ : scatter plot
    - ‘hexbin’ : hexbin plot

Geospatial plot in plotly

In [None]:
df = df[df['VERKEHR'] == 'FV']
data = [ dict(
        type = 'scattergeo',
        locationmode = 'Germany',
        lon = df['LAENGE'],
        lat = df['BREITE'],
        text = df['NAME'],
        mode = 'markers',
        marker = dict(
            size = 8,
            opacity = 0.8,
            reversescale = True,
            autocolorscale = False,
            symbol = 'square',
            #colorscale = scl,
            #cmin = 0,
            #color = df['cnt'],
            #cmax = df['cnt'].max(),
            
        ))]

layout = dict(
        title = 'Long distance stations<br>(Hover for station names)',
        colorbar = True,
        geo = dict(
            #scope='usa',
            #projection=dict( type='albers usa' ),
            showland = True,
            landcolor = "rgb(250, 250, 250)",
            subunitcolor = "rgb(217, 217, 217)",
            countrycolor = "rgb(217, 217, 217)",
            countrywidth = 0.5,
            subunitwidth = 0.5,
            lonaxis = dict(
            showgrid = True,
            gridwidth = 0.5,
            range= [ 5, 15.0 ],
            dtick = 5
            ),
            lataxis = dict (
                showgrid = True,
                gridwidth = 0.5,
                range= [ 45.0, 60.0 ],
                dtick = 5
            )
        ),
    )

fig = dict( data=data, layout=layout )
po.plot(fig, validate=False, filename='d3-stations.html')