In [1]:
import pymongo
import pandas as pd
from sqlalchemy import create_engine
import json

### Connection

In [2]:
credentials = {}
with open('credentials.json') as f:
    cred = json.load(f)
    credentials = cred['postgresql']
engine = create_engine('postgresql+psycopg2://{}:{}@{}:{}/opendata'.format(credentials['username'], credentials['password'], credentials['host'], credentials['port']))

In [3]:
client = pymongo.MongoClient('mongodb+srv://angers:sregna@clusterangers-7bjup.gcp.mongodb.net/test?retryWrites=true')

In [4]:
client.list_database_names()

['Traffic', 'admin', 'local']

In [5]:
db = client['Traffic']
db.list_collection_names()

['busTraffic', 'parkingTraffic']

### Parking

In [6]:
db['parkingTraffic'].estimated_document_count()

1873

In [7]:
records = []
for x in db['parkingTraffic'].find():
    for record in x['records']:
        f = record['fields']
        f['record_timestamp'] = record['record_timestamp']
        records.append(f)

pt = pd.DataFrame(records)
print(pt.shape)
pt.head()

(18740, 3)


Unnamed: 0,disponible,nom,record_timestamp
0,97,Republique,2019-04-12T15:17:00+00:00
1,19,Bressigny,2019-04-12T15:17:00+00:00
2,47,Mitterrand Rennes,2019-04-12T15:17:00+00:00
3,311,Saint Laud 2,2019-04-12T15:17:00+00:00
4,20,Ralliement,2019-04-12T15:17:00+00:00


In [8]:
pt.columns = ['Disponible','Nom','Date']
pt['Date'] = pd.to_datetime(pt['Date'])
pt.head()

Unnamed: 0,Disponible,Nom,Date
0,97,Republique,2019-04-12 15:17:00
1,19,Bressigny,2019-04-12 15:17:00
2,47,Mitterrand Rennes,2019-04-12 15:17:00
3,311,Saint Laud 2,2019-04-12 15:17:00
4,20,Ralliement,2019-04-12 15:17:00


In [9]:
pt.dtypes

Disponible             int64
Nom                   object
Date          datetime64[ns]
dtype: object

To the postgres database

In [13]:
pt.to_sql('parking', engine, index=False, if_exists='replace')

#### Coordinates

In [10]:
parking_csv = pd.read_csv('data/pv_equip_parking.csv', sep=';')
print(parking_csv.shape)
parking_csv.head()

(18, 22)


Unnamed: 0,NOM,EXPLOITANT,ADR_NUM,ADR_LIBVOI,NOM_COMMUNE,ID_EQUIP,ID_PARKING,Geo Shape,Geo Point,MOYENS_ACCES,...,NB_PLACES,NB_VELO_SECUR,NB_PLACES_PMR,ACCESSIBILITE,HORAIRES_OUVERTURE,HORAIRES_FERMETURE,FERMETURE_EXCEPTION,HORAIRES_EXCEPTION,THEME,SOUS_THEME
0,Parking Berges de Maine,ALTER,26,ALLÉE FRANÇOIS MITTERRAND,ANGERS,2524,Berges De Maine,"{""type"": ""Point"", ""coordinates"": [-0.549642004...","47.4791055266, -0.549642004596",AVENUE DES DROITS DE L'HOMME,...,305.0,0.0,7.0,24-24,07:00,00:00,,,TRANSPORT,PARKING
1,Parking Larrey,ALTER,26,ALLÉE FRANÇOIS MITTERRAND,ANGERS,7550,Larrey,"{""type"": ""Point"", ""coordinates"": [-0.554951239...","47.4806370511, -0.554951239998",RUE LARREY,...,28.0,0.0,2.0,24-24,,,,,TRANSPORT,PARKING
2,Parking Molière,ALTER,37,RUE THIERS,ANGERS,2089,Moliere,"{""type"": ""Point"", ""coordinates"": [-0.554276891...","47.4746632156, -0.554276891798",RUE THIERS,...,416.0,38.0,9.0,24-24,07:00,20:00,,,TRANSPORT,PARKING
3,Parking Poissonnerie,ALTER,7,RUE PLANTAGENÊT,ANGERS,2574,Poissonnerie,"{""type"": ""Point"", ""coordinates"": [-0.555937486...","47.4732815436, -0.555937486434",,...,,,,,,,,,TRANSPORT,PARKING
4,Parking Leclerc,ALTER,14,BOULEVARD BESSONNEAU,ANGERS,2275,Leclerc,"{""type"": ""Point"", ""coordinates"": [-0.546056082...","47.47144192, -0.54605608257",PLACE LECLERC / PALAIS DE JUSTICE,...,235.0,0.0,4.0,24-24,,,SAM 00:00-17:00,,TRANSPORT,PARKING


In [11]:
parking_csv.columns

Index(['NOM', 'EXPLOITANT', 'ADR_NUM', 'ADR_LIBVOI', 'NOM_COMMUNE', 'ID_EQUIP',
       'ID_PARKING', 'Geo Shape', 'Geo Point', 'MOYENS_ACCES', 'GPL',
       'HAUTEUR_MAX', 'NB_PLACES', 'NB_VELO_SECUR', 'NB_PLACES_PMR',
       'ACCESSIBILITE', 'HORAIRES_OUVERTURE', 'HORAIRES_FERMETURE',
       'FERMETURE_EXCEPTION', 'HORAIRES_EXCEPTION', 'THEME', 'SOUS_THEME'],
      dtype='object')

In [12]:
parking = parking_csv[['ID_PARKING','Geo Point','NB_PLACES']]
parking.columns = ['parking','geo_point','places']
parking

Unnamed: 0,parking,geo_point,places
0,Berges De Maine,"47.4791055266, -0.549642004596",305.0
1,Larrey,"47.4806370511, -0.554951239998",28.0
2,Moliere,"47.4746632156, -0.554276891798",416.0
3,Poissonnerie,"47.4732815436, -0.555937486434",
4,Leclerc,"47.47144192, -0.54605608257",235.0
5,Mitterrand Rennes,"47.476608392, -0.550574131243",133.0
6,Mitterrand Maine,"47.4772420234, -0.552025911935",151.0
7,Saint Laud,"47.4644889197, -0.558943020692",450.0
8,Marengo,"47.4648842682, -0.554823179984",305.0
9,Haras Public,"47.4648996364, -0.553765416644",112.0


In [13]:
latlon = parking['geo_point'].str.split(',', expand=True)
parking['latitude'] = pd.to_numeric(latlon[0])
parking['longitude'] = pd.to_numeric(latlon[1])
parking

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,parking,geo_point,places,latitude,longitude
0,Berges De Maine,"47.4791055266, -0.549642004596",305.0,47.479106,-0.549642
1,Larrey,"47.4806370511, -0.554951239998",28.0,47.480637,-0.554951
2,Moliere,"47.4746632156, -0.554276891798",416.0,47.474663,-0.554277
3,Poissonnerie,"47.4732815436, -0.555937486434",,47.473282,-0.555937
4,Leclerc,"47.47144192, -0.54605608257",235.0,47.471442,-0.546056
5,Mitterrand Rennes,"47.476608392, -0.550574131243",133.0,47.476608,-0.550574
6,Mitterrand Maine,"47.4772420234, -0.552025911935",151.0,47.477242,-0.552026
7,Saint Laud,"47.4644889197, -0.558943020692",450.0,47.464489,-0.558943
8,Marengo,"47.4648842682, -0.554823179984",305.0,47.464884,-0.554823
9,Haras Public,"47.4648996364, -0.553765416644",112.0,47.4649,-0.553765


In [15]:
parking.dtypes

parking       object
geo_point     object
places       float64
latitude     float64
longitude    float64
dtype: object

To the postgres database

In [18]:
parking.to_sql('parking_list', engine, index=False, if_exists='replace')

#### Parking full

In [14]:
parking_full = pt.merge(parking, left_on='Nom', right_on='parking')
parking_full.drop('parking', axis=1, inplace=True)
parking_full.columns = ['disponible','nom','date','geo_point','max_places','latitude','longitude']
print(parking_full.shape)
parking_full.head()

(18740, 7)


Unnamed: 0,disponible,nom,date,geo_point,max_places,latitude,longitude
0,97,Republique,2019-04-12 15:17:00,"47.4725623676, -0.554958380401",383.0,47.472562,-0.554958
1,103,Republique,2019-04-12 15:24:00,"47.4725623676, -0.554958380401",383.0,47.472562,-0.554958
2,109,Republique,2019-04-12 15:28:00,"47.4725623676, -0.554958380401",383.0,47.472562,-0.554958
3,105,Republique,2019-04-12 15:33:00,"47.4725623676, -0.554958380401",383.0,47.472562,-0.554958
4,120,Republique,2019-04-12 15:48:00,"47.4725623676, -0.554958380401",383.0,47.472562,-0.554958


To the postgres database

In [17]:
parking_full.to_sql('parking_full', engine, index=False, if_exists='replace')

### Bus

In [6]:
db['busTraffic'].estimated_document_count()

2795

In [7]:
records = []
for x in db['busTraffic'].find():
    for record in x['records']:
        f = record['fields']
        f['record_timestamp'] = record['record_timestamp']
        records.append(f)

bt = pd.DataFrame(records)
print(bt.shape)
bt.head()

(20138, 22)


Unnamed: 0,cap,coordonnees,dest,ecart,etat,harret,idarret,iddesserte,idligne,idparcours,...,mnemoligne,nomarret,nomligne,novh,numarret,record_timestamp,sv,type,x,y
0,243,"[47.485893, -0.51255723]",TRELAZE QUANTINIERE,0,TDEP,2019-04-12T15:20:00+00:00,1214154,268611585,268435458,268611584.0,...,2,BANCHAIS,ST SYLVAIN BANCHAIS <>TRELAZE,201,20610,2019-04-12T15:01:17+00:00,202,GX 427 Hyb,385339,2280145
1,337,"[47.465045, -0.55945135]",ST MARTIN DU F,0,HLPS,2019-04-12T15:30:00+00:00,1215437,270807297,268435492,270807296.0,...,36,GARES SEMARD SUB,ST MARTIN St JEAN <> GARE,3274,21525,2019-04-12T15:13:55+00:00,3611,MAN STARTE,381722,2277958
2,95,"[47.471121, -0.53121436]",ZI EST,143,LIGN,2019-04-12T15:15:18+00:00,1214308,268840755,268435462,268840704.0,...,6,FRATELLINI,BOUCHEMAINE <> Z I EST,423,23398,2019-04-12T15:15:17+00:00,608,OMNICITY,383874,2278555
3,232,"[47.468818, -0.55832905]",SAINTE GEMMES,713,LIGN,2019-04-12T15:16:14+00:00,1215795,269200411,268435467,269200384.0,...,11,ACADEMIE,LAC MAINE <> STE GEMMES CL ANJOU,443,913399,2019-04-12T15:15:17+00:00,1103,OMNICITY,381822,2278374
4,233,"[47.465946, -0.59877727]",LAC DE MAINE - L'HOIRIE,338,LIGN,2019-04-12T15:15:06+00:00,1214646,269168923,268435467,269168896.0,...,11,LES DENIERS,LAC MAINE <> STE GEMMES CL ANJOU,449,6046,2019-04-12T15:15:17+00:00,1125,OMNICITY,378763,2278168


In [8]:
bt.columns

Index(['cap', 'coordonnees', 'dest', 'ecart', 'etat', 'harret', 'idarret',
       'iddesserte', 'idligne', 'idparcours', 'idvh', 'mnemoarret',
       'mnemoligne', 'nomarret', 'nomligne', 'novh', 'numarret',
       'record_timestamp', 'sv', 'type', 'x', 'y'],
      dtype='object')

In [9]:
bus = bt.drop(['cap','etat','novh','numarret','sv','type','x','y'], axis=1).copy()
bus.columns = [
    'coordonnees',
    'destination',
    'ecart_horaire',
    'heure_estimee_arret',
    'id_arret',
    'id_desserte',
    'id_ligne',
    'id_parcours',
    'id_vehicule',
    'mnemo_arret',
    'mnemo_ligne',
    'nom_arret',
    'nom_ligne',
    'date'
]
bus['heure_estimee_arret'] = pd.to_datetime(bus['heure_estimee_arret'])
bus['date'] = pd.to_datetime(bus['date'])
bus.head()

Unnamed: 0,coordonnees,destination,ecart_horaire,heure_estimee_arret,id_arret,id_desserte,id_ligne,id_parcours,id_vehicule,mnemo_arret,mnemo_ligne,nom_arret,nom_ligne,date
0,"[47.485893, -0.51255723]",TRELAZE QUANTINIERE,0,2019-04-12 15:20:00+00:00,1214154,268611585,268435458,268611584.0,268435657,BANCSTMA,2,BANCHAIS,ST SYLVAIN BANCHAIS <>TRELAZE,2019-04-12 15:01:17+00:00
1,"[47.465045, -0.55945135]",ST MARTIN DU F,0,2019-04-12 15:30:00+00:00,1215437,270807297,268435492,270807296.0,268438730,GARESEMA,36,GARES SEMARD SUB,ST MARTIN St JEAN <> GARE,2019-04-12 15:13:55+00:00
2,"[47.471121, -0.53121436]",ZI EST,143,2019-04-12 15:15:18+00:00,1214308,268840755,268435462,268840704.0,268435879,CIEST.-E,6,FRATELLINI,BOUCHEMAINE <> Z I EST,2019-04-12 15:15:17+00:00
3,"[47.468818, -0.55832905]",SAINTE GEMMES,713,2019-04-12 15:16:14+00:00,1215795,269200411,268435467,269200384.0,268435899,ACADEMIE,11,ACADEMIE,LAC MAINE <> STE GEMMES CL ANJOU,2019-04-12 15:15:17+00:00
4,"[47.465946, -0.59877727]",LAC DE MAINE - L'HOIRIE,338,2019-04-12 15:15:06+00:00,1214646,269168923,268435467,269168896.0,268435905,LEDENIER,11,LES DENIERS,LAC MAINE <> STE GEMMES CL ANJOU,2019-04-12 15:15:17+00:00


In [10]:
bus['latitude'] = bt['coordonnees'].str[0]
bus['longitude'] = bt['coordonnees'].str[1]
bus.head()

Unnamed: 0,coordonnees,destination,ecart_horaire,heure_estimee_arret,id_arret,id_desserte,id_ligne,id_parcours,id_vehicule,mnemo_arret,mnemo_ligne,nom_arret,nom_ligne,date,latitude,longitude
0,"[47.485893, -0.51255723]",TRELAZE QUANTINIERE,0,2019-04-12 15:20:00+00:00,1214154,268611585,268435458,268611584.0,268435657,BANCSTMA,2,BANCHAIS,ST SYLVAIN BANCHAIS <>TRELAZE,2019-04-12 15:01:17+00:00,47.485893,-0.512557
1,"[47.465045, -0.55945135]",ST MARTIN DU F,0,2019-04-12 15:30:00+00:00,1215437,270807297,268435492,270807296.0,268438730,GARESEMA,36,GARES SEMARD SUB,ST MARTIN St JEAN <> GARE,2019-04-12 15:13:55+00:00,47.465045,-0.559451
2,"[47.471121, -0.53121436]",ZI EST,143,2019-04-12 15:15:18+00:00,1214308,268840755,268435462,268840704.0,268435879,CIEST.-E,6,FRATELLINI,BOUCHEMAINE <> Z I EST,2019-04-12 15:15:17+00:00,47.471121,-0.531214
3,"[47.468818, -0.55832905]",SAINTE GEMMES,713,2019-04-12 15:16:14+00:00,1215795,269200411,268435467,269200384.0,268435899,ACADEMIE,11,ACADEMIE,LAC MAINE <> STE GEMMES CL ANJOU,2019-04-12 15:15:17+00:00,47.468818,-0.558329
4,"[47.465946, -0.59877727]",LAC DE MAINE - L'HOIRIE,338,2019-04-12 15:15:06+00:00,1214646,269168923,268435467,269168896.0,268435905,LEDENIER,11,LES DENIERS,LAC MAINE <> STE GEMMES CL ANJOU,2019-04-12 15:15:17+00:00,47.465946,-0.598777


In [11]:
bus.dtypes

coordonnees                         object
destination                         object
ecart_horaire                        int64
heure_estimee_arret    datetime64[ns, UTC]
id_arret                             int64
id_desserte                          int64
id_ligne                             int64
id_parcours                        float64
id_vehicule                          int64
mnemo_arret                         object
mnemo_ligne                         object
nom_arret                           object
nom_ligne                           object
date                   datetime64[ns, UTC]
latitude                           float64
longitude                          float64
dtype: object

To the postgres database

In [12]:
bus.to_sql('bus', engine, index=False, if_exists='replace')

### Méteo

In [25]:
meteo = pd.read_sql_table('meteo_PDL', engine)
print(meteo.shape)
meteo.head()

(360, 24)


Unnamed: 0,Code département,Nom de la région,Nom du département,producteur,dateinsert,echeance,noversion,dateprevue,daterun,typeprev,...,Etat inondation,Etat neige,Etat canicule,Etat grand froid,Etat avalanches,Etat vague submersion,Geo Point 2D,Date mise à jour,latitude,longitude
0,49,PAYS DE LA LOIRE,MAINE-ET-LOIRE,DP,2019-04-11T08:00:00+02:00,24,1,2019-04-12 06:00:00+00:00,2019-04-11T08:00:00+02:00,1,...,Vert,Vert,Vert,Vert,Vert,Vert,"47.3912080347, -0.563907968988",2019-04-11 08:00:13+00:00,47.391208,-0.563908
1,72,PAYS DE LA LOIRE,SARTHE,DP,2019-04-11T12:00:00+02:00,20,2,2019-04-12 06:00:00+00:00,2019-04-11T12:00:00+02:00,1,...,Vert,Vert,Vert,Vert,Vert,Vert,"47.9949003337, 0.222652946898",2019-04-11 10:00:13+00:00,47.9949,0.222653
2,85,PAYS DE LA LOIRE,VENDEE,DP,2019-04-11T12:00:00+02:00,20,2,2019-04-12 06:00:00+00:00,2019-04-11T12:00:00+02:00,1,...,Vert,Vert,Vert,Vert,Vert,Vert,"46.6749413892, -1.2984615182",2019-04-11 14:00:13+00:00,46.674941,-1.298462
3,44,PAYS DE LA LOIRE,LOIRE-ATLANTIQUE,DP,2019-04-11T18:00:00+02:00,24,1,2019-04-12 16:00:00+00:00,2019-04-11T18:00:00+02:00,1,...,Vert,Vert,Vert,Vert,Vert,Vert,"47.3613759704, -1.68381313654",2019-04-12 01:00:13+00:00,47.361376,-1.683813
4,44,PAYS DE LA LOIRE,LOIRE-ATLANTIQUE,DP,2019-04-12T18:00:00+02:00,24,1,2019-04-13 16:00:00+00:00,2019-04-12T18:00:00+02:00,1,...,Vert,Vert,Vert,Vert,Vert,Vert,"47.3613759704, -1.68381313654",2019-04-12 20:01:51+00:00,47.361376,-1.683813


In [26]:
meteo.columns

Index(['Code département', 'Nom de la région', 'Nom du département',
       'producteur', 'dateinsert', 'echeance', 'noversion', 'dateprevue',
       'daterun', 'typeprev', 'Geo Shape Str', 'Etat vent',
       'Etat pluie inondation', 'Etat orage', 'Etat inondation', 'Etat neige',
       'Etat canicule', 'Etat grand froid', 'Etat avalanches',
       'Etat vague submersion', 'Geo Point 2D', 'Date mise à jour', 'latitude',
       'longitude'],
      dtype='object')

In [47]:
meteo['Etat vent'] = meteo['Etat vent'].astype('category')
meteo['Etat pluie inondation'] = meteo['Etat pluie inondation'].astype('category')
meteo['Etat orage'] = meteo['Etat orage'].astype('category')
meteo['Etat inondation'] = meteo['Etat inondation'].astype('category')
meteo['Etat neige'] = meteo['Etat neige'].astype('category')
meteo['Etat canicule'] = meteo['Etat canicule'].astype('category')
meteo['Etat grand froid'] = meteo['Etat grand froid'].astype('category')
meteo['Etat avalanches'] = meteo['Etat avalanches'].astype('category')
meteo['Etat vague submersion'] = meteo['Etat vague submersion'].astype('category')
meteo['dateinsert'] = pd.to_datetime(meteo['dateinsert'])
meteo['daterun'] = pd.to_datetime(meteo['daterun'])
meteo['dateprevue'] = (meteo['dateprevue']).apply(lambda d: pd.to_datetime(str(d)))
meteo['Date mise à jour'] = (meteo['Date mise à jour']).apply(lambda d: pd.to_datetime(str(d)))
meteo.dtypes

Code département                  int64
Nom de la région                 object
Nom du département               object
producteur                       object
dateinsert               datetime64[ns]
echeance                          int64
noversion                         int64
dateprevue               datetime64[ns]
daterun                  datetime64[ns]
typeprev                          int64
Geo Shape Str                    object
Etat vent                      category
Etat pluie inondation          category
Etat orage                     category
Etat inondation                category
Etat neige                     category
Etat canicule                  category
Etat grand froid               category
Etat avalanches                category
Etat vague submersion          category
Geo Point 2D                     object
Date mise à jour         datetime64[ns]
latitude                        float64
longitude                       float64
dtype: object

In [48]:
cat_columns = meteo.select_dtypes(['category']).columns
meteo[cat_columns] = meteo[cat_columns].apply(lambda x: x.cat.codes)
meteo.head()

Unnamed: 0,Code département,Nom de la région,Nom du département,producteur,dateinsert,echeance,noversion,dateprevue,daterun,typeprev,...,Etat inondation,Etat neige,Etat canicule,Etat grand froid,Etat avalanches,Etat vague submersion,Geo Point 2D,Date mise à jour,latitude,longitude
0,49,PAYS DE LA LOIRE,MAINE-ET-LOIRE,DP,2019-04-11 06:00:00,24,1,2019-04-12 06:00:00,2019-04-11 06:00:00,1,...,0,0,0,0,0,0,"47.3912080347, -0.563907968988",2019-04-11 08:00:13,47.391208,-0.563908
1,72,PAYS DE LA LOIRE,SARTHE,DP,2019-04-11 10:00:00,20,2,2019-04-12 06:00:00,2019-04-11 10:00:00,1,...,0,0,0,0,0,0,"47.9949003337, 0.222652946898",2019-04-11 10:00:13,47.9949,0.222653
2,85,PAYS DE LA LOIRE,VENDEE,DP,2019-04-11 10:00:00,20,2,2019-04-12 06:00:00,2019-04-11 10:00:00,1,...,0,0,0,0,0,0,"46.6749413892, -1.2984615182",2019-04-11 14:00:13,46.674941,-1.298462
3,44,PAYS DE LA LOIRE,LOIRE-ATLANTIQUE,DP,2019-04-11 16:00:00,24,1,2019-04-12 16:00:00,2019-04-11 16:00:00,1,...,0,0,0,0,0,0,"47.3613759704, -1.68381313654",2019-04-12 01:00:13,47.361376,-1.683813
4,44,PAYS DE LA LOIRE,LOIRE-ATLANTIQUE,DP,2019-04-12 16:00:00,24,1,2019-04-13 16:00:00,2019-04-12 16:00:00,1,...,0,0,0,0,0,0,"47.3613759704, -1.68381313654",2019-04-12 20:01:51,47.361376,-1.683813


To the postgres database

In [49]:
meteo.to_sql('meteo', engine, index=False, if_exists='replace')

### Autocar

In [51]:
autocar = pd.read_csv('data/autocar-regionaux.csv', sep=';')
print(autocar.shape)
autocar.head()

(1168, 5)


Unnamed: 0,Année,Mois,Nom de la ligne,Nombre de voyages sur la ligne autocar régionale,date
0,2007,01-01-,Nantes St Jean de Monts,581,01/01/2007
1,2009,02-01-,Nantes St Jean de Monts,463,01/02/2009
2,2009,05-01-,Nantes St Jean de Monts,1031,01/05/2009
3,2009,09-01-,Nantes St Jean de Monts,1549,01/09/2009
4,2009,11-01-,Nantes St Jean de Monts,1102,01/11/2009


In [52]:
autocar.dtypes

Année                                                int64
Mois                                                object
Nom de la ligne                                     object
Nombre de voyages sur la ligne autocar régionale     int64
date                                                object
dtype: object

In [53]:
autocar = autocar.drop(['Année','Mois'], axis=1)
autocar.columns = ['ligne','voyages','date']
autocar['date'] = pd.to_datetime(autocar['date'])
autocar.head()

Unnamed: 0,ligne,voyages,date
0,Nantes St Jean de Monts,581,2007-01-01
1,Nantes St Jean de Monts,463,2009-01-02
2,Nantes St Jean de Monts,1031,2009-01-05
3,Nantes St Jean de Monts,1549,2009-01-09
4,Nantes St Jean de Monts,1102,2009-01-11


To the postgres database

In [57]:
autocar.to_sql('autocar', engine, index=False, if_exists='replace')

### Destineo

In [58]:
destineo = pd.read_csv('data/destineo.csv', sep=',')
print(destineo.shape)
destineo.head()

(136, 9)


Unnamed: 0,Année,Mois code,Mois,Nombre de visites (Web),Nombre de sessions (application mobile),Nombre de pages vues (Web),Nombre d'écrans vus (application mobile),Nombre de recherches d'itinéraires effectuées,Date
0,2008,4,avril,16684.0,0.0,112741.0,0.0,0.0,2008-04-01
1,2008,8,août,17204.0,0.0,118541.0,0.0,0.0,2008-08-01
2,2008,10,octobre,21466.0,0.0,161947.0,0.0,0.0,2008-10-01
3,2017,1,janv.,61530.0,39841.0,173764.0,185254.0,950376.0,2017-01-01
4,2007,1,janv.,8717.0,0.0,57331.0,0.0,0.0,2007-01-01


In [59]:
destineo.dtypes

Année                                              int64
Mois code                                          int64
Mois                                              object
Nombre de visites (Web)                          float64
Nombre de sessions (application mobile)          float64
Nombre de pages vues (Web)                       float64
Nombre d'écrans vus (application mobile)         float64
Nombre de recherches d'itinéraires effectuées    float64
Date                                              object
dtype: object

In [60]:
destineo = destineo.drop(['Année','Mois code','Mois'], axis=1)
destineo.columns = ['visites_web','sessions_mobile','pages_vues_web','ecrans_vus_mobile','recherches_itineraires','date']
destineo['date'] = pd.to_datetime(destineo['date'])
destineo.head()

Unnamed: 0,visites_web,sessions_mobile,pages_vues_web,ecrans_vus_mobile,recherches_itineraires,date
0,16684.0,0.0,112741.0,0.0,0.0,2008-04-01
1,17204.0,0.0,118541.0,0.0,0.0,2008-08-01
2,21466.0,0.0,161947.0,0.0,0.0,2008-10-01
3,61530.0,39841.0,173764.0,185254.0,950376.0,2017-01-01
4,8717.0,0.0,57331.0,0.0,0.0,2007-01-01


In [62]:
destineo.describe()

Unnamed: 0,visites_web,sessions_mobile,pages_vues_web,ecrans_vus_mobile,recherches_itineraires
count,136.0,136.0,136.0,136.0,136.0
mean,42586.691176,9265.264706,182054.352941,48189.301471,350477.1
std,28699.440222,16521.867681,83708.22376,84899.867082,809484.0
min,7012.0,0.0,47020.0,0.0,0.0
25%,22959.0,0.0,123614.5,0.0,0.0
50%,35398.0,0.0,173475.0,0.0,0.0
75%,52206.5,7674.75,215954.25,48081.25,0.0
max,135156.0,62622.0,429607.0,364560.0,3336748.0


To the postgres database

In [63]:
destineo.to_sql('destineo', engine, index=False, if_exists='replace')

### Voies cyclables

In [96]:
cyclables = pd.read_csv('data/voies-cyclables.csv', sep=';')
print(cyclables.shape)
cyclables.head()

(1883, 10)


Unnamed: 0,NOM,VELO_TYPE,CIRCULATIO,CONTR_SENS,TRON_FONCT,LIBVOI,DATE_MAJ,SOURCE,Geo Point,Geo Shape
0,TRONCON CYCLABLE,BANDE_CYCLABLE,UNIDIRECTIONNEL,NON,VOIE DE DESSERTE,AVENUE JEAN XXIII,2017-08-10,ANGERS LOIRE METROPOLE - DIRECTION TRANSPORTS ...,"47.4510765578, -0.569394061809","{""type"": ""LineString"", ""coordinates"": [[-0.570..."
1,TRONCON CYCLABLE,BANDE_CYCLABLE,UNIDIRECTIONNEL,NON,VOIE DE DESSERTE LOCALE,BOULEVARD COPERNIC,2017-08-10,ANGERS LOIRE METROPOLE - DIRECTION TRANSPORTS ...,"47.4845333314, -0.522301770075","{""type"": ""LineString"", ""coordinates"": [[-0.522..."
2,TRONCON CYCLABLE,BANDE_CYCLABLE,UNIDIRECTIONNEL,NON,VOIE DE DESSERTE,BOULEVARD PABLO PICASSO,2017-08-10,ANGERS LOIRE METROPOLE - DIRECTION TRANSPORTS ...,"47.4529426556, -0.522170610031","{""type"": ""LineString"", ""coordinates"": [[-0.523..."
3,TRONCON CYCLABLE,TRAJECTOIRE_CYCLABLE,UNIDIRECTIONNEL,OUI,VOIE DE DESSERTE LOCALE,RUE DESMAZIERES,2017-08-10,ANGERS LOIRE METROPOLE - DIRECTION TRANSPORTS ...,"47.4608789336, -0.536339148475","{""type"": ""LineString"", ""coordinates"": [[-0.535..."
4,TRONCON CYCLABLE,BANDE_CYCLABLE,UNIDIRECTIONNEL,OUI,VOIE DE DESSERTE LOCALE,RUE FAIDHERBE,2017-08-10,ANGERS LOIRE METROPOLE - DIRECTION TRANSPORTS ...,"47.4668024591, -0.564011805685","{""type"": ""LineString"", ""coordinates"": [[-0.562..."


### Tables croisées

##### Travaux

In [79]:
travaux_csv = pd.read_csv('data/info-travaux.csv', sep=';')
print(travaux_csv.shape)
travaux_csv.head()

(56, 12)


Unnamed: 0,id,title,description,startAt,endAt,traffic,idParking,contact,email,isTramway,address,coordonnes
0,55422,Avenue Patton et rues du secteur,Neutralisation de voie. Chaussée rétrécie. Int...,2019-01-07T09:00:00+01:00,2019-12-31T19:00:00+01:00,slow,,241215400,info-travaux@ville.angers.fr,1,,"47.4730438869, -0.581367896117"
1,55582,Rue Choudieu,Neutralisation de voie. Chaussée rétrécie. Int...,2019-01-07T09:00:00+01:00,2019-12-31T19:00:00+01:00,slow,,241215400,info-travaux@ville.angers.fr,1,,"47.4760349729, -0.551161766052"
2,55762,Rue Henri-Peluau,Circulation interdite. Interdiction de station...,2019-01-21T09:00:00+01:00,2019-12-20T19:00:00+01:00,deviated,,241215400,info-travaux@ville.angers.fr,0,,"47.4908874387, -0.568303421452"
3,20,Quai Gambetta et place Molière,Fermeture de la bretelle d'accès au centre-vil...,2019-03-04T09:00:00+01:00,2019-05-17T20:00:00+02:00,deviated,,633286466,info-travaux@ville.angers.fr,1,,"47.473968225, -0.556182861328"
4,56695,Route de Briollay,Circulation alternée. Interdiction de stationn...,2019-04-01T10:00:00+02:00,2019-04-26T20:00:00+02:00,slow,,241215400,info-travaux@ville.angers.fr,0,,"47.4879695189, -0.53186753184"


In [97]:
travaux = travaux_csv[['title']].copy()
latlon = travaux_csv['coordonnes'].str.split(',', expand=True)
travaux['latitude'] = pd.to_numeric(latlon[0])
travaux['longitude'] = pd.to_numeric(latlon[1])
travaux['code_type'] = 1
travaux['type'] = 'travaux'
travaux.head()

Unnamed: 0,title,latitude,longitude,code_type,type
0,Avenue Patton et rues du secteur,47.473044,-0.581368,1,travaux
1,Rue Choudieu,47.476035,-0.551162,1,travaux
2,Rue Henri-Peluau,47.490887,-0.568303,1,travaux
3,Quai Gambetta et place Molière,47.473968,-0.556183,1,travaux
4,Route de Briollay,47.48797,-0.531868,1,travaux


##### Borne incendie

In [83]:
incendie_csv = pd.read_csv('data/borne-incendie.csv', sep=';')
print(incendie_csv.shape)
incendie_csv.head()

(3396, 13)


Unnamed: 0,Geo Point,Geo Shape,COMMUNE,IDVOIE,DEBIT,ADRESSE,CODINSEE,NUM_POMPIER,DIAMETRE,PRESSION,TYPE,EQUIP_DISPO,NOMVOIE
0,"47.4365427529, -0.495399462","{""type"": ""Point"", ""coordinates"": [-0.495399462...",LES PONTS-DE-CE,B122,163.0,Chemin Pellouard rond point Clos du Plessis - ...,246,8207,100,3.0,PI,,Le Plessis Charruault
1,"47.473716905, -0.586110753","{""type"": ""Point"", ""coordinates"": [-0.586110753...",ANGERS,0780,0.0,Av du General Patton angle rue de Belle Beille,7,723,100,0.0,PI,Non,Rue de Belle-Beille
2,"47.4574961605, -0.4754732236","{""type"": ""Point"", ""coordinates"": [-0.475473223...",TRELAZE,B110,0.0,Rue André Gide,353,11180,100,0.0,PI,,La Grande Guérinière
3,"47.43006685, -0.6073650385","{""type"": ""Point"", ""coordinates"": [-0.607365038...",BOUCHEMAINE,0066,94.0,Rue de Haute Roche No 40,35,3542,100,5.0,PI,,Rue de Haute Roche
4,"47.4520699205, -0.5321742619","{""type"": ""Point"", ""coordinates"": [-0.532174261...",ANGERS,5669,129.0,Rue des Mortiers angle square Mortiers,7,452,100,0.0,PI,,Square des Mortiers


In [98]:
incendie = incendie_csv[['COMMUNE']].copy()
incendie.columns = ['title']
latlon = incendie_csv['Geo Point'].str.split(',', expand=True)
incendie['latitude'] = pd.to_numeric(latlon[0])
incendie['longitude'] = pd.to_numeric(latlon[1])
incendie['code_type'] = 0
incendie['type'] = 'incendie'
incendie.head()

Unnamed: 0,title,latitude,longitude,code_type,type
0,LES PONTS-DE-CE,47.436543,-0.495399,0,incendie
1,ANGERS,47.473717,-0.586111,0,incendie
2,TRELAZE,47.457496,-0.475473,0,incendie
3,BOUCHEMAINE,47.430067,-0.607365,0,incendie
4,ANGERS,47.45207,-0.532174,0,incendie


##### Sanitaires publics

In [86]:
sanitaires_csv = pd.read_csv('data/sanitaires-publics.csv', sep=';')
print(sanitaires_csv.shape)
sanitaires_csv.head()

(34, 9)


Unnamed: 0,NOM,SOURCE,GID,CODE_INSEE,SOUR_GEOM,MODELE,DATE_MAJ,Geo Shape,Geo Point
0,Sanitaires Publics,Angers Loire Métropole - Direction Environneme...,26,49007,ORTHOPHOTO,SAGELEC,2016-09-12T02:00:00+02:00,"{""type"": ""Point"", ""coordinates"": [-0.524421603...","47.4867585726, -0.524421603504"
1,Sanitaires Publics,Angers Loire Métropole - Direction Environneme...,30,49007,ORTHOPHOTO,SAGELEC,2016-09-12T02:00:00+02:00,"{""type"": ""Point"", ""coordinates"": [-0.562668224...","47.4740384086, -0.5626682242"
2,Sanitaires Publics,Angers Loire Métropole - Direction Environneme...,35,49007,ORTHOPHOTO,SAGELEC,2016-09-12T02:00:00+02:00,"{""type"": ""Point"", ""coordinates"": [-0.536982919...","47.4594562153, -0.536982919494"
3,Sanitaires Publics,Angers Loire Métropole - Direction Environneme...,37,49007,ORTHOPHOTO,SAGELEC,2016-09-12T02:00:00+02:00,"{""type"": ""Point"", ""coordinates"": [-0.559673729...","47.4649367703, -0.55967372991"
4,Sanitaires Publics,Angers Loire Métropole - Direction Environneme...,44,49007,ORTHOPHOTO,SAGELEC,2016-09-12T02:00:00+02:00,"{""type"": ""Point"", ""coordinates"": [-0.534212328...","47.4789937041, -0.534212328038"


In [87]:
sanitaires = sanitaires_csv[['NOM']].copy()
sanitaires.columns = ['title']
latlon = sanitaires_csv['Geo Point'].str.split(',', expand=True)
sanitaires['latitude'] = pd.to_numeric(latlon[0])
sanitaires['longitude'] = pd.to_numeric(latlon[1])
sanitaires['code_type'] = 2
sanitaires['type'] = 'sanitaires'
sanitaires.head()

Unnamed: 0,title,latitude,longitude,code_type,type
0,Sanitaires Publics,47.486759,-0.524422,2,sanitaires
1,Sanitaires Publics,47.474038,-0.562668,2,sanitaires
2,Sanitaires Publics,47.459456,-0.536983,2,sanitaires
3,Sanitaires Publics,47.464937,-0.559674,2,sanitaires
4,Sanitaires Publics,47.478994,-0.534212,2,sanitaires


##### Déchèteries

In [88]:
decheteries_csv = pd.read_csv('data/decheterie.csv', sep=';')
print(decheteries_csv.shape)
decheteries_csv.head()

(8, 11)


Unnamed: 0,ID,LBL_TYPEOM,OBS,NOTE,HORAIRES_0401_1031,HORAIRES_1102_0331,NUMVOIE,NOMVOIE,CODEPOST,VILLE,coordonnes_geo
0,DECHETERIE_01,Déchèterie,Déchèterie du Haut Coudray (Montreuil-Juigné),,lundi au vendredi de 8h30 à 12h et de 13h30 à ...,lundi au vendredi de 9h à 12h et de 14h à 17h ...,22.0,Rue Paul Heroult - Z.I. du Haut Coudray,49460,Montreuil-Juigné,"47.533558, -0.626056"
1,DECHETERIE_07,Déchèterie,Déchèterie du Lac Bleu (Avrillé),,lundi au vendredi de 8h30 à 12h et de 13h30 à ...,lundi au vendredi de 9h à 12h et de 14h à 17h ...,,Rue de la Ternière,49240,Avrillé,"47.501574, -0.597444"
2,DECHETERIE_03,Déchèterie,Déchèterie de la Claie Brunette (Juigné-sur-Lo...,Accès réservé aux habitants munis d’un badge d...,lundi au vendredi de 8h30 à 12h et de 13h30 à ...,lundi au vendredi de 9h à 12h et de 14h à 17h ...,,Route des Coteaux,49610,Juigné-sur-Loire,"47.403524, -0.509974"
3,DECHETERIE_06,Déchèterie,Déchèterie des Brunelleries (Bouchemaine),,lundi au vendredi de 8h30 à 12h et de 13h30 à ...,lundi au vendredi de 9h à 12h et de 14h à 17h ...,,Ecopole Anjou-Loire,49080,Bouchemaine,"47.437577, -0.61541"
4,DECHETERIE_02,Déchèterie,Déchèterie de la Baumette (Angers),,lundi au vendredi de 8h30 à 12h et de 13h30 à ...,lundi au vendredi de 9h à 12h et de 14h à 17h ...,,Allée Seuil en Maine - La Baumette,49000,Angers,"47.458953, -0.574368"


In [90]:
decheteries = decheteries_csv[['OBS']].copy()
decheteries.columns = ['title']
latlon = decheteries_csv['coordonnes_geo'].str.split(',', expand=True)
decheteries['latitude'] = pd.to_numeric(latlon[0])
decheteries['longitude'] = pd.to_numeric(latlon[1])
decheteries['code_type'] = 3
decheteries['type'] = 'decheteries'
decheteries.head()

Unnamed: 0,title,latitude,longitude,code_type,type
0,Déchèterie du Haut Coudray (Montreuil-Juigné),47.533558,-0.626056,3,decheteries
1,Déchèterie du Lac Bleu (Avrillé),47.501574,-0.597444,3,decheteries
2,Déchèterie de la Claie Brunette (Juigné-sur-Lo...,47.403524,-0.509974,3,decheteries
3,Déchèterie des Brunelleries (Bouchemaine),47.437577,-0.61541,3,decheteries
4,Déchèterie de la Baumette (Angers),47.458953,-0.574368,3,decheteries


#### Merge

In [99]:
static = pd.concat([travaux,incendie,sanitaires,decheteries], ignore_index=True)
print(static.shape)
static.head()

(3494, 5)


Unnamed: 0,title,latitude,longitude,code_type,type
0,Avenue Patton et rues du secteur,47.473044,-0.581368,1,travaux
1,Rue Choudieu,47.476035,-0.551162,1,travaux
2,Rue Henri-Peluau,47.490887,-0.568303,1,travaux
3,Quai Gambetta et place Molière,47.473968,-0.556183,1,travaux
4,Route de Briollay,47.48797,-0.531868,1,travaux


In [100]:
static['type'].unique()

array(['travaux', 'incendie', 'sanitaires', 'decheteries'], dtype=object)

To the postgres database

In [101]:
static.to_sql('static', engine, index=False, if_exists='replace')