# Analiza podatkov sistema BicikeLJ

In [56]:
# Matplotlib inline
%matplotlib inline
# Imports
from urllib.request import urlopen
import json
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from pylab import *
# Figure size and style
plt.rcParams['figure.figsize'] = (15, 10)
#plt.style.use('ggplot')

Branje podatkov po protokolih, opisanih v: https://developer.jcdecaux.com/#/opendata/vls?page=getstarted

Zanimiva je diplomska naloga: http://geo.ff.uni-lj.si/pisnadela/pdfs/zaksem_201409_ziga_jamnik.pdf

In [57]:
# Parameters
# Bicycle, station dynamic infos
station_data_url = "https://api.jcdecaux.com/vls/v1/stations?contract=Ljubljana&apiKey=0a494317d60d3d556d0755600b078ea6b26af90f"
# Station info dataframe
station_info_fn = "bicikelj_station_info.csv"
station_data_fn = "bicikelj_station_data_1703.csv"

# Informacije o postajah

In [58]:
response = urlopen(station_data_url)
if response.code == 200:
    data = response.read().decode('utf-8')
else:
    print("Wrong response from ", station_data_url)

In [59]:
station_info_json = json.loads(data)

In [60]:
station_info = pd.DataFrame(station_info_json).sort_values(["number"]).reset_index(drop=True)

In [61]:
station_info = station_info[station_info['status'] == "OPEN"]

In [62]:
station_info.columns

Index(['address', 'available_bike_stands', 'available_bikes', 'banking',
       'bike_stands', 'bonus', 'contract_name', 'last_update', 'name',
       'number', 'position', 'status'],
      dtype='object')

In [63]:
station_info = station_info[['address', 'banking', 'bike_stands', 'bonus', 'name',
                             'number', 'position', 'status']].copy()

In [64]:
station_info = pd.concat([station_info.drop(['position'], axis=1), station_info["position"].apply(pd.Series)], axis=1)
station_info.head()

Unnamed: 0,address,banking,bike_stands,bonus,name,number,status,lat,lng
0,Prešernov trg 6,False,20,False,PREŠERNOV TRG-PETKOVŠKOVO NABREŽJE,1,OPEN,46.051367,14.506542
1,Pogačarjev trg 3,False,18,False,POGAČARJEV TRG-TRŽNICA,2,OPEN,46.051093,14.507186
2,Kongresni trg 3,False,20,False,KONGRESNI TRG-ŠUBIČEVA ULICA,3,OPEN,46.050388,14.504623
3,Cankarjeva cesta 1,False,26,False,CANKARJEVA UL.-NAMA,4,OPEN,46.052431,14.503257
4,Breg 20,False,20,False,BREG,5,OPEN,46.046498,14.505148


In [65]:
# Save to CSV
station_info.to_csv(station_info_fn, index=True)

In [66]:
station_info_s = station_info[["bike_stands", "name", "number"]]
station_info_s = station_info_s.set_index("number")

In [67]:
station_info_s

Unnamed: 0_level_0,bike_stands,name
number,Unnamed: 1_level_1,Unnamed: 2_level_1
1,20,PREŠERNOV TRG-PETKOVŠKOVO NABREŽJE
2,18,POGAČARJEV TRG-TRŽNICA
3,20,KONGRESNI TRG-ŠUBIČEVA ULICA
4,26,CANKARJEVA UL.-NAMA
5,20,BREG
6,18,GRUDNOVO NABREŽJE-KARLOVŠKA C.
7,18,MIKLOŠIČEV PARK
8,22,BAVARSKI DVOR
9,26,TRG OF-KOLODVORSKA UL.
10,18,MASARYKOVA DDC


# Branje podatkov o postajah

In [68]:
station_data_full = pd.read_csv(station_data_fn, index_col="last_update_time", parse_dates=True)

In [69]:
station_data_full.describe()

Unnamed: 0,available_bike_stands,available_bikes,bike_stands,number
count,22270.0,22270.0,22270.0,22270.0
mean,13.507544,6.280826,19.856309,24.744769
std,5.482766,5.125002,2.874461,14.650044
min,0.0,0.0,8.0,1.0
25%,10.0,2.0,18.0,12.0
50%,14.0,5.0,20.0,24.0
75%,18.0,10.0,20.0,37.0
max,26.0,26.0,26.0,51.0


In [70]:
station_data_full.index.min()

Timestamp('2017-03-29 14:19:25')

In [71]:
station_data_full.index.max()

Timestamp('2017-03-31 23:54:09')

In [72]:
station_data_full.head()

Unnamed: 0_level_0,available_bike_stands,available_bikes,bike_stands,number
last_update_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-03-29 14:19:25,17,3,20,42
2017-03-29 14:19:28,8,12,20,36
2017-03-29 14:19:40,16,4,20,46
2017-03-29 14:20:16,13,7,20,51
2017-03-29 14:20:30,16,0,16,24


In [73]:
station_data_stands = station_data_full.pivot(columns='number', values='available_bike_stands')
station_data_bikes = station_data_full.pivot(columns='number', values='available_bikes')

In [74]:
station_data_stands_hour = station_data_stands.groupby(station_data_stands.index.hour).aggregate("mean")
station_data_bikes_hour = station_data_bikes.groupby(station_data_bikes.index.hour).aggregate("mean")
station_data_bikes_hour

number,1,2,3,4,5,6,7,8,9,10,...,42,43,44,45,46,47,48,49,50,51
0,0.153846,0.0,1.235294,0.2,0.461538,0.923077,0.0,0.076923,6.333333,1.133333,...,15.416667,13.230769,16.666667,7.307692,10.846154,8.0,9.5,9.0,11.5,12.0
1,0.0,0.0,0.352941,0.0,0.25,1.166667,0.230769,0.0625,4.692308,0.529412,...,15.5,12.454545,17.583333,7.5,11.230769,8.0,9.5,9.0,11.5,12.0
2,0.0,0.0,0.416667,0.285714,0.0,1.083333,0.0,0.0,4.0,0.933333,...,15.285714,13.0,18.923077,7.5,11.0,8.0,9.230769,9.0,11.5,12.090909
3,0.0,0.0,0.5,0.0,0.0,1.0,0.0,0.076923,3.846154,1.0,...,15.636364,13.0,19.0,7.5,10.5,8.0,9.181818,9.0,11.5,12.0
4,0.0,0.0,0.466667,0.0,0.0,0.933333,0.0,0.214286,3.538462,0.857143,...,15.384615,13.0,18.5,7.769231,10.538462,7.571429,9.0,9.0,11.5,12.0
5,0.153846,0.25,1.083333,0.666667,0.0,1.090909,0.0,0.0,3.857143,1.142857,...,15.5,13.0,18.5,8.0,10.5,7.5,9.0,9.0,11.615385,11.833333
6,2.692308,1.571429,2.0,3.785714,0.0,1.166667,0.6,0.307692,1.529412,0.266667,...,15.5,13.0,17.642857,7.785714,9.692308,6.307692,9.0,9.333333,11.0,11.166667
7,5.105263,3.941176,5.722222,14.173913,2.294118,1.666667,3.421053,1.705882,3.0,0.058824,...,14.8,11.538462,15.933333,5.933333,6.533333,5.230769,8.642857,9.416667,9.285714,10.583333
8,6.0,4.625,9.666667,8.190476,7.647059,2.533333,9.809524,2.166667,1.526316,0.933333,...,14.727273,10.428571,12.333333,2.785714,3.866667,4.928571,5.928571,8.666667,8.384615,10.266667
9,10.294118,10.375,7.5625,14.571429,12.684211,4.071429,10.888889,2.3125,1.0,0.583333,...,10.25,7.75,6.2,3.071429,4.933333,4.153846,3.333333,7.923077,7.692308,9.909091


# Porazdelitev razpoložljivih koles glede na uro

In [75]:
bicikelj_pdf = "bicikelj_available.pdf"
pdf = PdfPages(bicikelj_pdf)
for col in station_data_bikes_hour.columns:
    data = station_data_bikes_hour[col].dropna()
    plt.title(str(col) + " - " + station_info_s["name"][col])
    plt.ylim(0, station_info_s["bike_stands"][col])
    plt.xlim(0,23)
    plt.xticks(range(0,25))
    plt.yticks(range(0,station_info_s["bike_stands"][col]+1))
    plt.grid(True)
    # plt.plot(data)
    plt.plot(data, linestyle="steps")
    pdf.savefig()
    close()
pdf.close()

In [76]:
station_data_bikes_hour.plot(linestyle="steps")

<matplotlib.axes._subplots.AxesSubplot at 0x25d99647898>

# Število koles po urah

Enako kot zgoraj, samo z grupiranjem

In [77]:
station_data_full = pd.read_csv(station_data_fn, index_col="last_update_time", parse_dates=True)
station_data_full.head()

Unnamed: 0_level_0,available_bike_stands,available_bikes,bike_stands,number
last_update_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-03-29 14:19:25,17,3,20,42
2017-03-29 14:19:28,8,12,20,36
2017-03-29 14:19:40,16,4,20,46
2017-03-29 14:20:16,13,7,20,51
2017-03-29 14:20:30,16,0,16,24


In [78]:
station_group_bikes_hour = station_data_full.groupby(["number", station_data_full.index.hour])["available_bikes"].mean()

In [79]:
# station_group_bikes_hour = station_group_bikes.unstack(level=1)

In [81]:
bicikelj_pdf = "bicikelj_available_2.pdf"
pdf = PdfPages(bicikelj_pdf)
for key in station_group_bikes_hour.index.levels[0]:
    data = station_group_bikes_hour[key]
    plt.title(str(key) + " - " + station_info_s["name"][key])
    plt.ylim(0, station_info_s["bike_stands"][key])
    plt.xlim(0,23)
    plt.xticks(range(0,25))
    plt.yticks(range(0,station_info_s["bike_stands"][key]+1))
    plt.grid(True)
    plt.plot(data, linestyle="steps")
    pdf.savefig()
    close()
pdf.close()

# Izposoje koles

In [82]:
station_data_full = pd.read_csv(station_data_fn, index_col="last_update_time", parse_dates=True)
station_data_full.head()

Unnamed: 0_level_0,available_bike_stands,available_bikes,bike_stands,number
last_update_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-03-29 14:19:25,17,3,20,42
2017-03-29 14:19:28,8,12,20,36
2017-03-29 14:19:40,16,4,20,46
2017-03-29 14:20:16,13,7,20,51
2017-03-29 14:20:30,16,0,16,24


Urejeno po postajah

In [83]:
station_group_bikes = station_data_full.groupby("number")

In [84]:
type(station_group_bikes.groups)

dict

In [85]:
data = station_group_bikes.get_group(1)
data["change"] = data["available_bikes"].diff()
data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


Unnamed: 0_level_0,available_bike_stands,available_bikes,bike_stands,number,change
last_update_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-03-29 14:28:53,16,4,20,1,
2017-03-29 14:33:15,18,2,20,1,-2.0
2017-03-29 14:36:00,20,0,20,1,-2.0
2017-03-29 14:48:17,19,1,20,1,1.0
2017-03-29 14:51:58,18,2,20,1,1.0


In [86]:
bicikelj_pdf = "bicikelj_available_3.pdf"
pdf = PdfPages(bicikelj_pdf)
# List groups
for key, item in station_group_bikes:
    data = station_group_bikes.get_group(key).copy()
    data["change"] = data["available_bikes"].diff()
    plt.title(str(key) + " - " + station_info_s["name"][key])
    plt.plot(data[["available_bikes", "change"]], linestyle="steps")
    plt.grid(True)
    pdf.savefig()
    close()
pdf.close()