In [1]:
import os
import glob
import shutil

import lzma
import json

import numpy as np
import pandas as pd

import geopandas as gpd
from geopandas.tools import overlay
import geopy

from keplergl import KeplerGl

# Data

In [2]:
input_path = 'parisstreet'

list_xz = glob.glob('./' + input_path +'/preprocessing' + '/*.xz')
len(list_xz)

1910

# Recupère Localisation

In [3]:
preprocessing_location = pd.read_csv(os.path.join(input_path,'df_adress.csv'))
preprocessing_location = preprocessing_location.drop_duplicates()
preprocessing_location

Unnamed: 0,zip_code,latitude,longitude,laltitude,file,geometry
0,75004,48.854836,2.350256,0.0,./#parisstreet\2016-05-09_05-07-58_UTC.json.xz,POINT (2.350256 48.854836)
1,75003,48.866667,2.364003,0.0,./#parisstreet\2018-03-15_22-49-50_UTC.json.xz,POINT (2.364003 48.866667)
2,75007,48.866727,2.322939,0.0,./#parisstreet\2018-07-01_13-59-49_UTC.json.xz,POINT (2.322939 48.866727)
3,75001,48.859301,2.340771,0.0,./#parisstreet\2018-07-21_19-31-22_UTC.json.xz,POINT (2.340771 48.859301)
4,75001,48.859301,2.340771,0.0,./#parisstreet\2018-07-21_19-32-13_UTC.json.xz,POINT (2.340771 48.859301)
...,...,...,...,...,...,...
1578,75020,48.859789,2.390837,0.0,./#parisstreet\2020-07-30_21-24-24_UTC.json.xz,POINT (2.390837 48.859789)
1579,,48.869057,2.324187,0.0,./#parisstreet\2020-07-30_21-46-02_UTC.json.xz,POINT (2.324187 48.869057)
1580,75007,48.858819,2.294597,0.0,./#parisstreet\2020-07-30_22-50-54_UTC.json.xz,POINT (2.294597 48.858819)
1581,75013,48.887099,2.342330,0.0,./#parisstreet\2020-07-31_06-29-45_UTC.json.xz,POINT (2.34233 48.887099)


In [4]:
preprocessing_location['match'] = preprocessing_location['file'].apply(lambda x : os.path.split(x)[-1])

geo_adress = gpd.GeoDataFrame(preprocessing_location, geometry = gpd.points_from_xy(preprocessing_location.longitude, preprocessing_location.latitude), crs="EPSG:4326")
geo_adress

Unnamed: 0,zip_code,latitude,longitude,laltitude,file,geometry,match
0,75004,48.854836,2.350256,0.0,./#parisstreet\2016-05-09_05-07-58_UTC.json.xz,POINT (2.35026 48.85484),2016-05-09_05-07-58_UTC.json.xz
1,75003,48.866667,2.364003,0.0,./#parisstreet\2018-03-15_22-49-50_UTC.json.xz,POINT (2.36400 48.86667),2018-03-15_22-49-50_UTC.json.xz
2,75007,48.866727,2.322939,0.0,./#parisstreet\2018-07-01_13-59-49_UTC.json.xz,POINT (2.32294 48.86673),2018-07-01_13-59-49_UTC.json.xz
3,75001,48.859301,2.340771,0.0,./#parisstreet\2018-07-21_19-31-22_UTC.json.xz,POINT (2.34077 48.85930),2018-07-21_19-31-22_UTC.json.xz
4,75001,48.859301,2.340771,0.0,./#parisstreet\2018-07-21_19-32-13_UTC.json.xz,POINT (2.34077 48.85930),2018-07-21_19-32-13_UTC.json.xz
...,...,...,...,...,...,...,...
1578,75020,48.859789,2.390837,0.0,./#parisstreet\2020-07-30_21-24-24_UTC.json.xz,POINT (2.39084 48.85979),2020-07-30_21-24-24_UTC.json.xz
1579,,48.869057,2.324187,0.0,./#parisstreet\2020-07-30_21-46-02_UTC.json.xz,POINT (2.32419 48.86906),2020-07-30_21-46-02_UTC.json.xz
1580,75007,48.858819,2.294597,0.0,./#parisstreet\2020-07-30_22-50-54_UTC.json.xz,POINT (2.29460 48.85882),2020-07-30_22-50-54_UTC.json.xz
1581,75013,48.887099,2.342330,0.0,./#parisstreet\2020-07-31_06-29-45_UTC.json.xz,POINT (2.34233 48.88710),2020-07-31_06-29-45_UTC.json.xz


# Récupère Language processing

In [5]:
df_language = pd.read_csv(os.path.join(input_path,'sentiment.csv'))
df_language = df_language.drop(['Unnamed: 0'], axis = 1)
df_language['match'] = df_language['index'].apply(lambda x : os.path.split(x)[-1])

In [6]:
df_language

Unnamed: 0,index,sentiment,match
0,./parisstreet/preprocessing\2016-05-09_05-07-5...,0.0,2016-05-09_05-07-58_UTC.json.xz
1,./parisstreet/preprocessing\2016-10-08_22-03-5...,3.0,2016-10-08_22-03-56_UTC.json.xz
2,./parisstreet/preprocessing\2017-08-07_20-25-0...,0.0,2017-08-07_20-25-02_UTC.json.xz
3,./parisstreet/preprocessing\2017-10-29_20-18-1...,0.0,2017-10-29_20-18-19_UTC.json.xz
4,./parisstreet/preprocessing\2018-03-15_22-49-5...,0.0,2018-03-15_22-49-50_UTC.json.xz
...,...,...,...
1874,./parisstreet/preprocessing\2020-07-30_21-24-2...,0.0,2020-07-30_21-24-24_UTC.json.xz
1875,./parisstreet/preprocessing\2020-07-30_21-46-0...,0.0,2020-07-30_21-46-02_UTC.json.xz
1876,./parisstreet/preprocessing\2020-07-30_22-50-5...,9.0,2020-07-30_22-50-54_UTC.json.xz
1877,./parisstreet/preprocessing\2020-07-31_06-29-4...,0.0,2020-07-31_06-29-45_UTC.json.xz


# UNIFICATION

In [7]:
unifodeur = geo_adress.merge(df_language, on = 'match', how = 'inner').drop(["index", "match"], axis = 1)
unifodeur.head()

Unnamed: 0,zip_code,latitude,longitude,laltitude,file,geometry,sentiment
0,75004,48.854836,2.350256,0.0,./#parisstreet\2016-05-09_05-07-58_UTC.json.xz,POINT (2.35026 48.85484),0.0
1,75003,48.866667,2.364003,0.0,./#parisstreet\2018-03-15_22-49-50_UTC.json.xz,POINT (2.36400 48.86667),0.0
2,75007,48.866727,2.322939,0.0,./#parisstreet\2018-07-01_13-59-49_UTC.json.xz,POINT (2.32294 48.86673),0.0
3,75001,48.859301,2.340771,0.0,./#parisstreet\2018-07-21_19-31-22_UTC.json.xz,POINT (2.34077 48.85930),0.0
4,75001,48.859301,2.340771,0.0,./#parisstreet\2018-07-21_19-32-13_UTC.json.xz,POINT (2.34077 48.85930),0.0


In [36]:
unifodeur.to_csv('ParisFeeling.csv')

#  Pr arrondisement

In [38]:
geospatial_data  = 'C:\\Users\\Basile\\Pictures\\DATA\\france-geojson-master\\departements\\75-paris\\arrondissements.geojson'
f = gpd.read_file(geospatial_data, crs="EPSG:4326")
f['n_sq_ar'] = f['n_sq_ar'].apply(lambda x : str(x)[:3] + str(x)[-2:])
f = f.set_index('n_sq_ar')
f = f.sort_index(axis=0)

f = f.drop(['n_sq_co', 'objectid', 'l_ar', 'c_arinsee','c_ar'], axis =1 )
f.head()

Unnamed: 0_level_0,longueur,surface,perimetre,l_aroff,geometry
n_sq_ar,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
75001,6054.680862,1824613.0,6054.936862,Louvre,"POLYGON ((2.32801 48.86992, 2.32997 48.86851, ..."
75002,4553.938764,991153.7,4554.10436,Bourse,"POLYGON ((2.35152 48.86443, 2.35095 48.86341, ..."
75003,4519.071982,1170883.0,4519.263648,Temple,"POLYGON ((2.36383 48.86750, 2.36389 48.86747, ..."
75004,5420.636779,1600586.0,5420.908434,Hôtel-de-Ville,"POLYGON ((2.36851 48.85573, 2.36900 48.85374, ..."
75005,6238.823408,2539375.0,6239.195396,Panthéon,"POLYGON ((2.36443 48.84614, 2.36484 48.84584, ..."


In [43]:
sentiment_arr = gpd.sjoin(f,unifodeur, how="inner", op='intersects')
group = sentiment_arr.groupby(['n_sq_ar']).mean()
group = group.drop(['index_right'], axis = 1)
geo_group = gpd.GeoDataFrame(group, geometry = gpd.points_from_xy(group.longitude, group.latitude), crs="EPSG:4326")
sentiment_dep_geo = gpd.sjoin(f,geo_group, how="inner", op='intersects')
sentiment_dep_geo = sentiment_dep_geo.drop(['index_right', 'longueur_left', 'surface_left','perimetre_left'], axis = 1)

In [44]:
sentiment_dep_geo.head()

Unnamed: 0_level_0,l_aroff,geometry,longueur_right,surface_right,perimetre_right,latitude,longitude,laltitude,sentiment
n_sq_ar,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
75001,Louvre,"POLYGON ((2.32801 48.86992, 2.32997 48.86851, ...",6054.680862,1824613.0,6054.936862,48.860575,2.339585,0.0,1.353846
75002,Bourse,"POLYGON ((2.35152 48.86443, 2.35095 48.86341, ...",4553.938764,991153.7,4554.10436,48.86731,2.340701,0.0,1.53125
75003,Temple,"POLYGON ((2.36383 48.86750, 2.36389 48.86747, ...",4519.071982,1170883.0,4519.263648,48.858642,2.362102,0.0,0.578947
75004,Hôtel-de-Ville,"POLYGON ((2.36851 48.85573, 2.36900 48.85374, ...",5420.636779,1600586.0,5420.908434,48.85673,2.351766,0.0,0.6
75005,Panthéon,"POLYGON ((2.36443 48.84614, 2.36484 48.84584, ...",6238.823408,2539375.0,6239.195396,48.849361,2.349583,0.0,0.969697


In [45]:
sentiment_dep_geo.head()
sentiment_dep_geo = sentiment_dep_geo.reset_index()
sentiment_dep_geo

Unnamed: 0,n_sq_ar,l_aroff,geometry,longueur_right,surface_right,perimetre_right,latitude,longitude,laltitude,sentiment
0,75001,Louvre,"POLYGON ((2.32801 48.86992, 2.32997 48.86851, ...",6054.680862,1824613.0,6054.936862,48.860575,2.339585,0.0,1.353846
1,75002,Bourse,"POLYGON ((2.35152 48.86443, 2.35095 48.86341, ...",4553.938764,991153.7,4554.10436,48.86731,2.340701,0.0,1.53125
2,75003,Temple,"POLYGON ((2.36383 48.86750, 2.36389 48.86747, ...",4519.071982,1170883.0,4519.263648,48.858642,2.362102,0.0,0.578947
3,75004,Hôtel-de-Ville,"POLYGON ((2.36851 48.85573, 2.36900 48.85374, ...",5420.636779,1600586.0,5420.908434,48.85673,2.351766,0.0,0.6
4,75005,Panthéon,"POLYGON ((2.36443 48.84614, 2.36484 48.84584, ...",6238.823408,2539375.0,6239.195396,48.849361,2.349583,0.0,0.969697
5,75006,Luxembourg,"POLYGON ((2.34459 48.85405, 2.34428 48.85375, ...",6483.32456,2153096.0,6483.686786,48.851653,2.337176,0.0,1.057971
6,75007,Palais-Bourbon,"POLYGON ((2.32090 48.86306, 2.32094 48.86305, ...",8099.033151,4090057.0,8099.424883,48.858901,2.300237,0.0,1.596386
7,75008,Élysée,"POLYGON ((2.32584 48.86956, 2.32569 48.86954, ...",7880.260661,3880036.0,7880.533268,48.869158,2.313472,0.0,1.93
8,75009,Opéra,"POLYGON ((2.33978 48.88203, 2.33982 48.88202, ...",6471.389144,2178303.0,6471.58829,48.874764,2.334804,0.0,0.916667
9,75010,Entrepôt,"POLYGON ((2.36469 48.88437, 2.36485 48.88436, ...",6739.168359,2891739.0,6739.375055,48.873459,2.359521,0.0,0.766667


In [47]:
feeling_paris = KeplerGl(height=500)
feeling_paris.add_data(sentiment_dep_geo, name = 'feeling_paris')
feeling_paris.add_data(unifodeur, name = 'point_feeling_paris')
feeling_paris.save_to_html(file_name='feeling_paris_1.html')

User Guide: https://github.com/keplergl/kepler.gl/blob/master/docs/keplergl-jupyter/user-guide.md
Map saved to feeling_paris_1.html!


In [24]:
import matplotlib.pyplot as plt

x = sentiment['zip_code'].value_counts().index
y = sentiment['zip_code'].value_counts().values

plt.bar(x,y)

NameError: name 'sentiment' is not defined

## Visualisation

In [8]:
arrondisement = 'C:\\Users\\Basile\\Pictures\\DATA\\france-geojson-master\\departements\\75-paris\\arrondissements-75-paris.geojson'
arrondisement = gpd.read_file(arrondisement)
arrondisement

Unnamed: 0,code,nom,geometry
0,75001,Paris,"POLYGON ((2.39007 48.82570, 2.38153 48.82242, ..."


In [9]:
insta_map = KeplerGl(height=500)
insta_map.add_data(insight, name = 'insta_map')
insta_map.add_data(arrondisement, name = 'arrondisement')
insta_map.save_to_html(file_name='insta_map.html')

User Guide: https://github.com/keplergl/kepler.gl/blob/master/docs/keplergl-jupyter/user-guide.md
Map saved to insta_map.html!


In [10]:
insight

Unnamed: 0.1,Unnamed: 0,index,sentiment,zip_code,latitude,longitude,laltitude,file,code,nom,geometry
0,0.0,./parisstreet/preprocessing\2016-05-09_05-07-5...,neutral,75004,48.854388,2.356921,0.0,./#parisstreet\2016-05-09_05-07-58_UTC.json.xz,75,Paris,POINT (2.35692 48.85439)
1,1.0,./parisstreet/preprocessing\2016-10-08_22-03-5...,positif,75014,48.830108,2.323026,0.0,./#parisstreet\2016-10-08_22-03-56_UTC.json.xz,75,Paris,POINT (2.32303 48.83011)
2,2.0,./parisstreet/preprocessing\2017-08-07_20-25-0...,neutral,75018,48.892046,2.348679,0.0,./#parisstreet\2017-08-07_20-25-02_UTC.json.xz,75,Paris,POINT (2.34868 48.89205)
3,3.0,./parisstreet/preprocessing\2017-10-29_20-18-1...,neutral,75018,48.892046,2.348679,0.0,./#parisstreet\2017-10-29_20-18-19_UTC.json.xz,75,Paris,POINT (2.34868 48.89205)
4,4.0,./parisstreet/preprocessing\2018-03-15_22-49-5...,neutral,75003,48.862489,2.359316,0.0,./#parisstreet\2018-03-15_22-49-50_UTC.json.xz,75,Paris,POINT (2.35932 48.86249)
...,...,...,...,...,...,...,...,...,...,...,...
1138,,,,75007,48.854665,2.312078,0.0,./#parisstreet\2020-05-10_08-50-24_UTC.json.xz,75,Paris,POINT (2.31208 48.85467)
1139,,,,75010,48.876090,2.362272,0.0,./#parisstreet\2020-05-10_09-00-12_UTC.json.xz,75,Paris,POINT (2.36227 48.87609)
1140,,,,75001,48.845000,2.375200,0.0,./#parisstreet\2020-05-10_09-34-11_UTC.json.xz,75,Paris,POINT (2.37520 48.84500)
1141,,,,75008,48.873645,2.311600,0.0,./#parisstreet\2020-05-10_11-06-25_UTC.json.xz,75,Paris,POINT (2.31160 48.87365)
