Created 15/10/2018

Last update 17/01/2019

# 1. Import library

In [1]:
import pymongo
from __future__ import division
import numpy as np
import pandas as pd
# from scipy import stats
from bokeh.models import HoverTool, NumeralTickFormatter, ColumnDataSource
from bokeh.plotting import figure
from bokeh.io import show, output_notebook
from bokeh.layouts import row
output_notebook() # To display plot into Jupyter notebook
import Fcn_BasicStatisticalAnalysisOfPopulation

# 2. Data loading

In [2]:
# User inputs
DatabaseName = 'GeoApiGouv'
CollectionName = 'PopulationCity'

# Connection to MongoDB
myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient[DatabaseName]

#Query all documents from MongoD
QueryMongo = list(mydb[CollectionName].find({}))

# 3. Statistical analysis

## 3.1. Arrange data loaded

In [3]:
NumberOfCity      = len(QueryMongo)
AllCityNames      = [QueryMongo[x]['nom'] for x in range(0,NumberOfCity)]
AllPopulationList = [QueryMongo[x]['population'] for x in range(0,NumberOfCity)]
AllAreaList       = [QueryMongo[x]['surface'] for x in range(0,NumberOfCity)]
AllDensityList    = np.array(AllPopulationList)/np.array(AllAreaList)*100

In [45]:
df = pd.DataFrame({"City name":AllCityNames,"Population":AllPopulationList,"Area":AllAreaList,"Density":AllDensityList})
df.sort_values(by=['Density'], ascending = False).head()

Unnamed: 0,Area,City name,Density,Population
227,5038,Rennes,4195.57364,211373
111,1043,Fougères,1933.844679,20170
21,56,Bécherel,1289.285714,722
90,782,Dinard,1258.184143,9839
202,3826,Saint-Malo,1174.046001,44919


## 3.2. Population basic analysis

In [4]:
TotalPopulation   = sum(AllPopulationList)
AveragePopulation = int(np.mean(AllPopulationList)) # Population moyenne 
MedianPopulation  = int(np.median(AllPopulationList)) # Population mediane
MinPopulation     = np.min(AllPopulationList) # Population la plus faible
MaxPopulation     = np.max(AllPopulationList) # Population la plus élevée

print 'Nombre de ville: ' + "{:,}".format(NumberOfCity).replace(',',' ')
print 'Population totale: ' + "{:,}".format(TotalPopulation).replace(',',' ')
print 'Population moyenne par ville: '+ "{:,}".format(AveragePopulation).replace(',',' ')
print 'Population mediane par ville: '+ "{:,}".format(MedianPopulation).replace(',',' ')
print 'Population la plus faible: '+ "{:,}".format(MinPopulation).replace(',',' ')  + ' (' + str(list(mydb[CollectionName].find({'population': MinPopulation},{'nom':1,'_id':0}))[0]['nom'].encode('utf-8')) + ')'
print 'Population la plus élevée: '+ "{:,}".format(MaxPopulation).replace(',',' ') + ' (' + str(list(mydb[CollectionName].find({'population': MaxPopulation},{'nom':1,'_id':0}))[0]['nom'].encode('utf-8')) + ')'

Nombre de ville: 345
Population totale: 1 019 923
Population moyenne par ville: 2 956
Population mediane par ville: 1 314
Population la plus faible: 107 (Bléruais)
Population la plus élevée: 211 373 (Rennes)


### 3.2.1. Population histogram

In [5]:
# Prepare the data for plot
start = Fcn_BasicStatisticalAnalysisOfPopulation.DefineMinMaxLog(MinPopulation,'Min')
stop = Fcn_BasicStatisticalAnalysisOfPopulation.DefineMinMaxLog(MaxPopulation,'Max')
bins = Fcn_BasicStatisticalAnalysisOfPopulation.CreateLogBin(start,stop)
hist, bin_edges = np.histogram(AllPopulationList, bins=bins)

# Remove the first and last value when hist is 0 - Comment out this line to show forall log scale
hist, bin_edges = Fcn_BasicStatisticalAnalysisOfPopulation.RemoveLastFirstValuesWhenZero(hist, bin_edges)

In [6]:
# Plot histogram
p = figure(plot_height=300, title="City population",
           x_axis_label='City population bins', y_axis_label='Count')
p.quad(top=list(hist), bottom=0, left=list(range(0,len(bin_edges)))[:-1], right=list(range(0,len(bin_edges)))[1:],
       fill_color = "#0276FD", line_color = "#5F5B5B", alpha=1)

p.xaxis.ticker = list(range(0,len(bin_edges)))
p.xaxis.major_label_orientation = 3.14/4
p.xaxis.major_label_overrides = {el:"{:,}".format(bin_edges[el]).replace(',',' ') for el in range(0,len(bin_edges))}

p.title.text_font_size = '12pt'
p.title.text_color = "#0276FD"
p.title.text_font = "verdana"
p.yaxis.axis_label_text_font = "verdana"
p.yaxis.axis_label_text_font_style = "normal"
p.xaxis.axis_label_text_font = "verdana"
p.xaxis.axis_label_text_font_style = "normal"
p.xaxis.major_label_text_font = "verdana"
p.yaxis.major_label_text_font = "verdana"

show(p)

La majorité des villes (30%) a une population comprise entre 1000 et 2000 habitants. Ensuite, viennent les villes ayant une population comprise entre 2000 et 3000 habitants (11%) enfin 7% des villes ont une population de 300 a 400 habitants.

In [7]:
CityPopBinPercentage = [int(round(list(hist)[x]/NumberOfCity*100)) for x in range(0,len(hist))]
CityPopBinPercentage[9]

30

### 3.2.2. Population cumulative distribution

In [8]:
#Plot cumulative distribution
p = figure(plot_height=300, title="City population distribution",
           x_axis_label='City population bins', y_axis_label='Percentile [%ile]')
p.step(list(range(0,len(bin_edges))),[0]+[list(np.cumsum(hist))[x]/NumberOfCity for x in range(0,len(hist))],
       line_color="#0276FD",line_width=2)

p.xaxis.ticker = list(range(0,len(bin_edges)))
p.xaxis.major_label_orientation = 3.14/4
p.xaxis.major_label_overrides = {el:"{:,}".format(bin_edges[el]).replace(',',' ') for el in range(0,len(bin_edges))}

p.title.text_font_size = '12pt'
p.title.text_color = "#0276FD"
p.title.text_font = "verdana"
p.yaxis.axis_label_text_font = "verdana"
p.yaxis.axis_label_text_font_style = "normal"
p.xaxis.axis_label_text_font = "verdana"
p.xaxis.axis_label_text_font_style = "normal"
p.xaxis.major_label_text_font = "verdana"
p.yaxis.major_label_text_font = "verdana"

show(p)

37% des villes ont 1000 habitants ou moins. 68% des villes ont 2000 habitants ou moins. Moins de 10% des villes ont 5000 habitants ou plus. Les villes de 10000 habitants ou plus représentent moins de 1% des villes.

In [9]:
CityPopBinCumDistributionPercentage = [int(round(list(np.cumsum(hist))[x]/NumberOfCity*100)) for x in range(0,len(hist))]
CityPopBinCumDistributionPercentage[20]

99

## 3.3. Area basic analysis

In [10]:
#Statistics about area
TotalArea = sum(AllAreaList)/100 # km2, originally surface in hectare
AverageArea = int(np.mean(AllAreaList)) # Surface moyenne 
MedianArea = int(np.median(AllAreaList)) # Surface mediane
MinArea = np.min(AllAreaList) # Surface la plus faible
MaxArea = np.max(AllAreaList) # Surface la plus élevée

print 'Surface totale: ' + str(TotalArea) + ' km' + (u"\u00b2").encode('utf-8') 
print 'Surface moyenne: '+ str(AverageArea/100) + ' km' + (u"\u00b2").encode('utf-8') 
print 'Surface mediane: '+ str(MedianArea/100) + ' km' + (u"\u00b2").encode('utf-8') 
print 'Surface la plus faible: '+ str(MinArea/100) + ' km' + (u"\u00b2").encode('utf-8') + ' (' + str(list(mydb[CollectionName].find({'surface': MinArea},{'nom':1,'_id':0}))[0]['nom'].encode('utf-8')) + ')'
print 'Surface la plus élevée: '+ str(MaxArea/100) + ' km' + (u"\u00b2").encode('utf-8')  + ' (' + str(list(mydb[CollectionName].find({'surface': MaxArea},{'nom':1,'_id':0}))[0]['nom'].encode('utf-8')) + ')'

Surface totale: 6840.54 km²
Surface moyenne: 19.82 km²
Surface mediane: 15.81 km²
Surface la plus faible: 0.56 km² (Bécherel)
Surface la plus élevée: 110.66 km² (Paimpont)


### 3.3.1 Area histogram

In [11]:
# Prepare the data for plot
start = Fcn_BasicStatisticalAnalysisOfPopulation.DefineMinMaxLog(MinArea,'Min')
stop = Fcn_BasicStatisticalAnalysisOfPopulation.DefineMinMaxLog(MaxArea,'Max')
bins = Fcn_BasicStatisticalAnalysisOfPopulation.CreateLogBin(start,stop)
hist, bin_edges = np.histogram(AllAreaList, bins=bins)

# Remove the first and last value when hist is 0 - Comment out this line to show forall log scale
hist, bin_edges = Fcn_BasicStatisticalAnalysisOfPopulation.RemoveLastFirstValuesWhenZero(hist, bin_edges)

In [12]:
# Plot histogram
p = figure(plot_height=300, title="City area",
           x_axis_label='City area bins'+ ' [km' + (u"\u00b2]").encode('utf-8'), y_axis_label='Count [#]')
p.quad(top=list(hist), bottom=0, left=list(range(0,len(bin_edges)))[:-1], right=list(range(0,len(bin_edges)))[1:],
       fill_color = "#0276FD", line_color = "#5F5B5B", alpha=1)

p.xaxis.ticker = list(range(0,len(bin_edges)))
p.xaxis.major_label_orientation = 3.14/4
p.xaxis.major_label_overrides = {el:"{:,}".format((bin_edges[el]/100)).replace(',',' ') if (bin_edges[el]/100) < 1 else "{:,}".format(int(bin_edges[el]/100)).replace(',',' ') for el in range(0,len(bin_edges))}
p.title.text_font_size = '12pt'
p.title.text_color = "#0276FD"
p.title.text_font = "verdana"
p.yaxis.axis_label_text_font = "verdana"
p.yaxis.axis_label_text_font_style = "normal"
p.xaxis.axis_label_text_font = "verdana"
p.xaxis.axis_label_text_font_style = "normal"
p.xaxis.major_label_text_font = "verdana"
p.yaxis.major_label_text_font = "verdana"
show(p)

36% de villes ont un territoire compris entre 10 et 20 km$^2$ et 19% des villes ont une surface comprises entre 20 et 30 km$^2$. Une seule ville a un territoire inférieur a 1km$^2$.

In [13]:
CityAreaBinPercentage = [int(round(list(hist)[x]/NumberOfCity*100)) for x in range(0,len(hist))]
CityAreaBinPercentage[15]

19

### 3.3.2. Population cumulative distribution

In [14]:
#Plot cumulative distribution
p = figure(plot_height=300, title="City area distribution",
           x_axis_label='City area bins', y_axis_label='Percentile [%ile]')
p.step(list(range(0,len(bin_edges))),[0]+[list(np.cumsum(hist))[x]/NumberOfCity for x in range(0,len(hist))],
       line_color="#0276FD",line_width=2)

p.xaxis.ticker = list(range(0,len(bin_edges)))
p.xaxis.major_label_orientation = 3.14/4
p.xaxis.major_label_overrides = {el:"{:,}".format((bin_edges[el]/100)).replace(',',' ') if (bin_edges[el]/100) < 1 else "{:,}".format(int(bin_edges[el]/100)).replace(',',' ') for el in range(0,len(bin_edges))}

p.title.text_font_size = '12pt'
p.title.text_color = "#0276FD"
p.title.text_font = "verdana"
p.yaxis.axis_label_text_font = "verdana"
p.yaxis.axis_label_text_font_style = "normal"
p.xaxis.axis_label_text_font = "verdana"
p.xaxis.axis_label_text_font_style = "normal"
p.xaxis.major_label_text_font = "verdana"
p.yaxis.major_label_text_font = "verdana"

show(p)

27% des villes ont un territoire inférieur ou égal a 10km$^2$. 63% des villes ont un territoire inférieur ou égal a 20km$^2$. Un peu moins de 10% des villes ont un territoire supérieur a 30km$^2$. Environ 2% des villes ont un territoire inferieur a 1km$^2$ ou supérieur a 70km$^2$.

In [15]:
CityAreaBinDistributionPercentage = [int(round(list(np.cumsum(hist))[x]/NumberOfCity*100)) for x in range(0,len(hist))]
CityAreaBinDistributionPercentage[14]

63

## 3.4. Population density basic analysis

Not done yet, it should provides same analysis as population and area

In [16]:
#Statistics about area
TotalDensity = int(TotalPopulation/TotalArea) # pop/km2
AverageDensity = int(np.mean(AllDensityList)) # Densité moyenne 
MedianDensity = int(np.median(AllDensityList)) # Densité mediane
MinDensity = int(np.min(AllDensityList)) # Densité la plus faible
MaxDensity = int(np.max(AllDensityList)) # Densité la plus élevée

print 'Densité totale: ' + str(TotalDensity) + ' pop/km' + (u"\u00b2").encode('utf-8') 
print 'Densité moyenne: '+ str(AverageDensity) + ' pop/km' + (u"\u00b2").encode('utf-8') 
print 'Densité mediane: '+ str(MedianDensity) + ' pop/km' + (u"\u00b2").encode('utf-8') 
print 'Densité la plus faible: '+ str(MinDensity) + ' pop/km' + (u"\u00b2").encode('utf-8') #+ ' (' + str(list(mydb[CollectionName].find({'surface': MinDensity},{'nom':1,'_id':0}))[0]['nom'].encode('utf-8')) + ')'
print 'Densité la plus élevée: '+ str(MaxDensity) + ' pop/km' + (u"\u00b2").encode('utf-8') # + ' (' + str(list(mydb[CollectionName].find({'surface': MaxDensity},{'nom':1,'_id':0}))[0]['nom'].encode('utf-8')) + ')'

Densité totale: 149 pop/km²
Densité moyenne: 155 pop/km²
Densité mediane: 74 pop/km²
Densité la plus faible: 14 pop/km²
Densité la plus élevée: 4195 pop/km²


## 3.5. Population density analysis

**Purpose:** highlight that cities around big city have a higher population and population density

### 3.5.1. Prepare the data

In [17]:
# Get most populated city info
MostPopCityIndex = np.argmax(AllPopulationList)
# Calculate distance to main city
ListDistance = list()
for CurrentCity in range(0,NumberOfCity):
    ListDistance.append(Fcn_BasicStatisticalAnalysisOfPopulation.DistanceCoordToKm(QueryMongo[MostPopCityIndex]['centre']['coordinates'][1],QueryMongo[MostPopCityIndex]['centre']['coordinates'][0],QueryMongo[CurrentCity]['centre']['coordinates'][1],QueryMongo[CurrentCity]['centre']['coordinates'][0]))

### 3.5.2. Scatter plot - Population vs distance to most populated city

In [18]:
p = figure(plot_height=300, title="City population vs distance to most populated city",
           x_axis_label='Distance to the most populated city [km]', y_axis_label='Population')
p.circle([ListDistance[i] for i in range(0,NumberOfCity) if ListDistance[i]>0],[AllPopulationList[i] for i in range(0,NumberOfCity) if AllPopulationList[i]<>AllPopulationList[MostPopCityIndex]],
        color="#0276FD")

p.title.text_font_size = '12pt'
p.title.text_color = "#0276FD"
p.title.text_font = "verdana"
p.yaxis.axis_label_text_font = "verdana"
p.yaxis.axis_label_text_font_style = "normal"
p.xaxis.axis_label_text_font = "verdana"
p.xaxis.axis_label_text_font_style = "normal"
p.xaxis.major_label_text_font = "verdana"
p.yaxis.major_label_text_font = "verdana"

show(p)

Difficile de mettre en lumiere une tendance. Répéter la meme méthode avec la densité de population

### 3.5.3. Scatter plot - Density population vs distance to most populated city

In [19]:
# Create the density list
# ListDensity = np.array(AllPopulationList)/np.array(AllAreaList)*100

In [20]:
p = figure(plot_height=300, title="City density population vs distance to most populated city",
           x_axis_label='Distance to the most populated city [km]', y_axis_label='Population density')
p.circle([ListDistance[i] for i in range(0,NumberOfCity) if AllPopulationList[i]<>AllPopulationList[MostPopCityIndex]],[AllDensityList[i] for i in range(0,NumberOfCity) if AllPopulationList[i]<>AllPopulationList[MostPopCityIndex]],
        color="#0276FD")

p.title.text_font_size = '12pt'
p.title.text_color = "#0276FD"
p.title.text_font = "verdana"
p.yaxis.axis_label_text_font = "verdana"
p.yaxis.axis_label_text_font_style = "normal"
p.xaxis.axis_label_text_font = "verdana"
p.xaxis.axis_label_text_font_style = "normal"
p.xaxis.major_label_text_font = "verdana"
p.yaxis.major_label_text_font = "verdana"

show(p)

Ce graphe montre que plus une ville est proche, moins de 15km, de la ville la plus peuplée et plus la densité de population est importante. Entre 15km et 50km de distance la densité de population est plutot faible et constante. Au dela de 50 km, la densité de population augmente du fait de l'eloignement de la ville principale, d'autres villes importantes se developpent.

**Hypothese:** Autour des principales villes, la densité de population est plus importante puis diminue a mesure que l'on s'eloigne de ce centre urbain puis ré-augmente en s'approchant d'un autre centre urbain

### 3.5.4. Investiguer l'hypothese émise

**Etape:** 
- Prendre un exemple avec 2-3 villes pour vérifier si l'hypothese est plausible
- Identifier les principales villes
- Vérifier que la densité diminue avec l'eloignement d'un centre urbain (prendre 2 villes est observer la tendance qui devrait etre de forme parabolique)
- Etablir une relation entre l'éloignement, la densité et la population du centre urbain le plus proche

In [21]:
# Comment définir une grande ville? La ville la plus peuplée + d'autres (quels criteres? distance entre grande ville? population par rapport a une ville plus peuplé?)
# Au moins 2000 habitants -> https://www.larousse.fr/dictionnaires/francais/ville/82000
# https://baripedia.org/wiki/Ville_et_Urbanisation
# https://www.persee.fr/doc/hism_0982-1783_1987_num_2_2_1312

In [22]:
# Choose Rennes and Saint-Malo, calculate the distance between those 2 cities [Dist_R-SM].
# For all cities calculate the distance to these 2 cities.
# Then select only the cities that are between these 2 cities
# - Cities within intersection of circles from both cities with radius [Dist_R-SM]
# - Cities within [Dist_R-SM]+15km -> Margin part could be changed or used %

In [23]:
# Create dataframe to make it easier to keep all info in one table
df = pd.DataFrame({'Name':AllCityNames,
                   'Population':AllPopulationList,
                   'Area':AllAreaList,
                   'Density':AllDensityList})
df = df[['Name','Population','Area','Density']]

# Get the index of the two most populated cities
TwoLargestCityIndex = list(df.nlargest(2,'Population').index.values)

# Add column with distance of each city to the most two populated cities
for CurrentMostPopCity in range(0,len(TwoLargestCityIndex)):
    # Get index of the two most populated cities
    MostPopCityIndex = TwoLargestCityIndex[CurrentMostPopCity]
    # Initialise the list containing the distance
    globals()['List1_{0}'.format(TwoLargestCityIndex[CurrentMostPopCity])]  = list()
    # For loop to calculate the distance between the current city and all the other cities
    for CurrentCity in range(0,NumberOfCity):
        globals()['List1_{0}'.format(TwoLargestCityIndex[CurrentMostPopCity])].append(Fcn_BasicStatisticalAnalysisOfPopulation.DistanceCoordToKm(QueryMongo[MostPopCityIndex]['centre']['coordinates'][1],QueryMongo[MostPopCityIndex]['centre']['coordinates'][0],QueryMongo[CurrentCity]['centre']['coordinates'][1],QueryMongo[CurrentCity]['centre']['coordinates'][0]))
    # Add to the dataframe a column with the distance to the current city
    df['DistTo{0}'.format(df['Name'][TwoLargestCityIndex[CurrentMostPopCity]])] = pd.Series(globals()['List1_{0}'.format(TwoLargestCityIndex[CurrentMostPopCity])])

# Get distance between the two most populated cities
MasterDistance = sum(df['DistToRennes'][TwoLargestCityIndex])

df.head()

Unnamed: 0,Name,Population,Area,Density,DistToRennes,DistToSaint-Malo
0,Acigné,6282,3020,208.013245,12.612759,67.290686
1,Amanlis,1616,2556,63.223787,18.900247,82.226049
2,Andouillé-Neuville,837,1266,66.113744,21.88349,49.618939
3,Antrain,1366,942,145.010616,41.789843,44.005262
4,Arbrissel,290,469,61.833689,35.483827,96.394868


**Methode 1:** Selectionner uniquement les villes se trouvant dans l'intersection des deux cercles de rayon egal a la distance entre les deux villes les plus peuplés et ayant pour centre ces deux villes

In [24]:
# Get index of the cities between the two most populated cities
df1 = df
df1 = df1.drop(TwoLargestCityIndex)
df1 = df1[(df1['DistToRennes'] <= MasterDistance) & (df1['DistToSaint-Malo'] <= MasterDistance)]
df1.head()

Unnamed: 0,Name,Population,Area,Density,DistToRennes,DistToSaint-Malo
2,Andouillé-Neuville,837,1266,66.113744,21.88349,49.618939
3,Antrain,1366,942,145.010616,41.789843,44.005262
6,Aubigné,487,222,219.369369,21.29293,47.785542
8,Baguer-Morvan,1621,2443,66.352845,43.693092,23.974464
9,Baguer-Pican,1578,1610,98.012422,48.152533,25.965216


In [25]:
print 'Number of cities after filter: ' + str(len(df1))

Number of cities after filter: 170


In [26]:
p1 = figure(plot_height=300, title="City density population vs distance to most populated city", plot_width = 495,
           x_axis_label='Distance to the most populated city [km]', y_axis_label='Population density')
p1.circle([list(df1['DistToRennes'])[i] for i in range(0,len(list(df1['DistToRennes']))) if list(df1['DistToRennes'])[i]>0],[list(df1['Density'])[i] for i in range(0,len(list(df1['DistToRennes']))) if list(df1['DistToRennes'])[i]>0],
        color="#0276FD")
p2 = figure(plot_height=300, title="City density population vs distance to second most populated city", plot_width = 495,
           x_axis_label='Distance to the second most populated city [km]', y_axis_label='Population density')
p2.circle([list(df1['DistToSaint-Malo'])[i] for i in range(0,len(list(df1['DistToSaint-Malo']))) if list(df1['DistToSaint-Malo'])[i]>0],[list(df1['Density'])[i] for i in range(0,len(list(df1['DistToSaint-Malo']))) if list(df1['DistToSaint-Malo'])[i]>0],
        color="#0276FD")
show(row(p1,p2))

**Commentaire:** Entre les deux villes, la densité de population est moindre lorsque l'on s'en éloigne. Cet phénomene s'observe plus aisément aux abords de la ville la plus peuplée. Certaines villes a plus de 25km d'une des deux principales villes ont une densité plus importante que la tendance générale, cela peut etre du a un teritoire relativement petit. Cette méthode selectionne 170 villes.

**Methode 2:** Sélectionner les villes dont la somme de la distance les séparant est superieure ou égal a la distance entre les principales villes plus 10%.

In [27]:
df2 = df
df2 = df2.drop(TwoLargestCityIndex)
df2['SumDist'] = df2['DistToRennes'] + df2['DistToSaint-Malo']
df2 = df2[(df2['SumDist'])< MasterDistance+MasterDistance*0.1]
df2.head()

Unnamed: 0,Name,Population,Area,Density,DistToRennes,DistToSaint-Malo,SumDist
6,Aubigné,487,222,219.369369,21.29293,47.785542,69.078472
8,Baguer-Morvan,1621,2443,66.352845,43.693092,23.974464,67.667556
16,La Baussaine,640,976,65.57377,27.578951,38.580752,66.159703
19,Hédé-Bazouges,2099,1472,142.595109,22.391508,42.352398,64.743906
21,Bécherel,722,56,1289.285714,28.328497,39.842571,68.171067


In [28]:
print 'Number of cities after filter: ' + str(len(df2))

Number of cities after filter: 84


In [29]:
p1 = figure(plot_height=300, title="City density population vs distance to most populated city", plot_width = 495,
           x_axis_label='Distance to the most populated city [km]', y_axis_label='Population density')
p1.circle([list(df2['DistToRennes'])[i] for i in range(0,len(list(df2['DistToRennes']))) if list(df2['DistToRennes'])[i]>0],[list(df2['Density'])[i] for i in range(0,len(list(df2['DistToRennes']))) if list(df2['DistToRennes'])[i]>0],
        color="#0276FD")
p2 = figure(plot_height=300, title="City density population vs distance to second most populated city", plot_width = 495,
           x_axis_label='Distance to the second most populated city [km]', y_axis_label='Population density')
p2.circle([list(df2['DistToSaint-Malo'])[i] for i in range(0,len(list(df2['DistToSaint-Malo']))) if list(df2['DistToSaint-Malo'])[i]>0],[list(df2['Density'])[i] for i in range(0,len(list(df2['DistToSaint-Malo']))) if list(df2['DistToSaint-Malo'])[i]>0],
        color="#0276FD")
show(row(p1,p2))

**Commentaire:** Cette deuxieme méthode permet de mettre encore plus en évidence l'hypothese émit. Cette méthode sélectionne 84 villes.

**Conclusion:** L'hypothese émise parait plausible, reste maintenant a déterminer si le phonemene est similaire pour les autres villes et comment le quantifier (relation mathématique, équation...)

In [30]:
len(globals()['List1_{0}'.format(TwoLargestCityIndex[1])])

345

In [31]:
#Create a table to extract quickly info about the previous distribution
#Map to display population with different color scale (use log scale or other scale)