# Italian schools

## Setup

In [334]:
import pandas as pd
import numpy as np

## Data Preparation

In [335]:
# Import data
school_1 = pd.read_csv("../data/ScuoleParitarie_2021_22.csv")
school_2 = pd.read_csv("../data/ScuoleParitarieTrentino_2021_22.csv")
school_3 = pd.read_csv("../data/ScuoleStatali_2021_22.csv")
school_4 = pd.read_csv("../data/ScuoleStataliTrentino_2021_22.csv")

### Private Schools Data

Cleaning the private schools data:
1. Remove Anno scolastico
2. Rename all columns
3. Replace "Non Disponibile" with NaN
4. Set column "type" as "Paritaria"
5. Remove "NON STATALE" from grade column and add an additional column with "STATALE" or "PARITARIA"
6. Convert string values to lower case

In [336]:
# Private Schools
private = pd.concat([school_1, school_2]).drop(['ANNOSCOLASTICO'], axis=1)
private.rename(columns={
    "AREAGEOGRAFICA": "area",
    "REGIONE": "region",
    "PROVINCIA": "province",
    "CODICESCUOLA": "id",
    "DENOMINAZIONESCUOLA": "name",
    "INDIRIZZOSCUOLA": "address",
    "CAPSCUOLA": "cap",
    "CODICECOMUNESCUOLA": "mun_id",
    "DESCRIZIONECOMUNE": "municipality",
    "DESCRIZIONETIPOLOGIAGRADOISTRUZIONESCUOLA": "grade",
    "INDIRIZZOEMAILSCUOLA": "email",
    "INDIRIZZOPECSCUOLA": "pec",
    "SITOWEBSCUOLA": "website"
}, inplace=True)

In [337]:
# Replace Non Disponibile with NaN 
private.replace("Non Disponibile", np.nan, inplace=True)

# Defining Private school type
private['type'] = "Paritaria"

In [338]:
# Remove NON STATALE from the grade school
private["grade"] = [x.replace("NON STATALE", "") for x in private["grade"]]

# Lower case
private[['area', 'region', 'province', 'name', 'address', 'municipality', 'grade']] = private[['area', 'region',
                                                                                               'province', 'name', 
                                                                                               'address', 'municipality', 'grade']].applymap(lambda s: s.title() if type(s) == str else s)
private[['email', 'pec', 'website']] = private[['email', 'pec', 'website']
                                               ].applymap(lambda s: s.lower() if type(s) == str else s)

# Set the school type for private schools
private['school_type'] = "Paritaria"

In [339]:
# Missing information
private.isnull().sum()

area               0
region             0
province           0
id                 0
name               0
address            0
cap                0
mun_id             0
municipality       0
grade              0
email           2837
pec             7609
website         8274
type               0
school_type        0
dtype: int64

### Public Schools Data
Cleaning the public schools data:
1. Remove Anno scolastico, Sede scolastica, Indicazione sede direttivo and indicazione sede omnicomprensivo
2. Rename all columns
3. Replace "Non Disponibile" with NaN
4. Set column "type" as "Statale"
5. Remove "NON STATALE" from grade column and add an additional column with "STATALE" or "PARITARIA"
6. Convert string values to lower case

In [340]:
# Public Schools
public = pd.concat([school_3, school_4]).drop(
    ['ANNOSCOLASTICO', 'INDICAZIONESEDEDIRETTIVO', 'SEDESCOLASTICA', 'INDICAZIONESEDEOMNICOMPRENSIVO'], axis=1)

public.rename(columns={
    "AREAGEOGRAFICA": "area",
    "REGIONE": "region",
    "PROVINCIA": "province",
    "CODICEISTITUTORIFERIMENTO": "inst_id",
    "DENOMINAZIONEISTITUTORIFERIMENTO": "inst_name",
    "CODICESCUOLA": "id",
    "DENOMINAZIONESCUOLA": "name",
    "INDIRIZZOSCUOLA": "address",
    "CAPSCUOLA": "cap",
    "CODICECOMUNESCUOLA": "mun_id",
    "DESCRIZIONECOMUNE": "municipality",
    "DESCRIZIONECARATTERISTICASCUOLA": "school_type",
    "DESCRIZIONETIPOLOGIAGRADOISTRUZIONESCUOLA": "grade",
    "INDIRIZZOEMAILSCUOLA": "email",
    "INDIRIZZOPECSCUOLA": "pec",
    "SITOWEBSCUOLA": "website", 
    "INDICAZIONESEDEDIRETTIVO": "site_id"}, inplace=True)

In [341]:
# Replace Non Disponibile with NaN 
public.replace("Non Disponibile", np.nan, inplace=True)

# Defining Public school type
public["type"] = "Statale"

In [342]:
# Lower case
public[['area', 'region', 'province', 'inst_name', 'school_type', 'name', 'address', 'municipality', 'grade']] = public[['area', 'region',
                                                                                                                         'province', 'inst_name', 
                                                                                                                         'school_type', 'name', 
                                                                                                                         'address', 'municipality', 'grade']].applymap(lambda s: s.title() if type(s) == str else s)
public[['email', 'pec', 'website']] = public[['email', 'pec', 'website']
                                             ].applymap(lambda s: s.lower() if type(s) == str else s)

public = public[['area', 'region', 'province', 'id', 'name', 'address',
                 'cap', 'mun_id',  'municipality', 'grade',
                 'email', 'pec', 'website',
                 'type', 'school_type',
                 'inst_id', 'inst_name']]

# Filling missing names with the corresponding institute name
public.name.fillna(public.inst_name, inplace=True)

In [343]:
# Missing information
public.isnull().sum()

area                0
region              0
province            0
id                  0
name                0
address          1357
cap               883
mun_id              0
municipality        0
grade               0
email            1257
pec             43811
website          8728
type                0
school_type         0
inst_id             0
inst_name           0
dtype: int64

### Schools data

In [344]:
# Concatenate both public and private schools data
schools = pd.concat([public, private], ignore_index=True)

In [345]:
# Renaming regions name
schools.replace("Friuli-Venezia G.", "Friuli Venezia Giulia", inplace=True)
schools.replace("Trentino-Alto Adige", "Trentino Alto Adige", inplace=True)
schools.replace("Valle D' Aosta", "Valle D'Aosta", inplace=True)

In [346]:
# Area
set(schools.area)

{'Centro', 'Isole', 'Nord Est', 'Nord Ovest', 'Sud'}

In [347]:
# Regions
set(schools.region)

{'Abruzzo',
 'Basilicata',
 'Calabria',
 'Campania',
 'Emilia Romagna',
 'Friuli Venezia Giulia',
 'Lazio',
 'Liguria',
 'Lombardia',
 'Marche',
 'Molise',
 'Piemonte',
 'Puglia',
 'Sardegna',
 'Sicilia',
 'Toscana',
 'Trentino Alto Adige',
 'Umbria',
 "Valle D'Aosta",
 'Veneto'}

In [348]:
# Grade
set(schools.grade)

{'Centro Territoriale',
 'Convitto Annesso',
 'Convitto Nazionale',
 'Educandato',
 'Ist Prof Alberghiero',
 'Ist Prof Cinematografia E Televisione',
 'Ist Prof Industria E Artigianato',
 'Ist Prof Industria E Artigianato Per Ciechi',
 'Ist Prof Industria E Artigianato Per Sordomuti',
 "Ist Prof Industria E Attivita' Marinare",
 'Ist Prof Per I Servizi Alberghieri E Ristorazione',
 'Ist Prof Per I Servizi Comm Tur E Della Pubb',
 'Ist Prof Per I Servizi Commerciali',
 'Ist Prof Per I Servizi Commerciali E Turistici',
 'Ist Prof Per I Servizi Pubblicitari',
 'Ist Prof Per I Servizi Sociali',
 'Ist Prof Per I Servizi Turistici',
 "Ist Prof Per L'Agricoltura",
 "Ist Prof Per L'Agricoltura E L'Ambiente",
 'Ist Tec Commerciale E Per Geometri',
 'Ist Tecnico Commerciale',
 'Ist Tecnico Economico E Tecnologico',
 'Istituto Comprensivo',
 "Istituto D'Arte",
 'Istituto Magistrale',
 'Istituto Superiore',
 'Istituto Tecnico Aeronautico',
 'Istituto Tecnico Agrario',
 'Istituto Tecnico Commercial

In [349]:
set(schools.type)

{'Paritaria', 'Statale'}

In [350]:
set(schools.school_type)

{'Ann. A Conservatorio',
 'Ann. A Educandato',
 'Ann. A Istituto Arte',
 'Annesso A Convitto',
 'Annesso A Istituto',
 'C/O Ist. Ospedaliero',
 'Convitto Annesso',
 'Convitto Nazionale',
 'Cpia',
 'Di Confine',
 'Di Montagna',
 'Educandato Femminile',
 'Isolano',
 'Normale',
 'Paritaria',
 'Per Ciechi',
 'Per Sordomuti',
 'Percorso Ii Livello',
 'Scuola Annessa',
 'Sloveno',
 'Spec. Per Carcerari',
 'Spec. Per Sordomuti',
 'Speciale Per Ciechi',
 'Sperimentale Per Dm'}

In [351]:
# Number of municipalities with at least a school
len(set(schools.municipality))

7063

### Population data

1. Load Italian Population Data 
2. Keep Cap, Name, Gender and Number of people for each Municipality
3. Load Municipalities information with Region and Area of Interest
4. Join schools information with the total population 

In [352]:
# Population
population = pd.read_csv("../data/ItalianPopulation2021_2.csv")
population = population[['ITTER107', 'Territorio', 'Sesso', 'Value']].iloc[3:,:].reset_index(drop=True)

municipalities = pd.read_csv("../data/Municipalities.csv", delimiter = ";", encoding = "ANSI", dtype={'ID': 'object'})

population = pd.merge(municipalities, population, right_on = ["ITTER107", "Territorio"], left_on= ["ID","Nome"]).drop(['Provincia','ITTER107','Territorio'], axis=1)
pop_tot = population[population['Sesso'] == 'totale'].groupby(['Nome'], as_index=False).sum()


In [353]:
pop_tot.rename(columns={'Value': 'population'}, inplace=True)

In [354]:
schools = pd.merge(pop_tot, schools, left_on='Nome', right_on="municipality").drop(["Nome"], axis=1)

## A look at data

### School Names

Aim: *Discover which are the most used names for schools and if there are any differences amongst italian geographical areas.*

In [355]:
# All names used for italian schools
names = schools.name.tolist()

# Set area as categorical variable with specific order (from south to north)
schools['area'] = pd.Categorical(schools['area'],
                                 categories = ["Isole", "Sud", "Centro", "Nord Est", "Nord Ovest"],
                                 ordered=True)

In [356]:
# Top 20 used names in Italian Schools with repetitions
import plotly.express as px
schools['collapsed_name'] = names

# Get top 20 names
most_frequent_names = schools.groupby(["collapsed_name"],
                                      as_index=False).size().sort_values(['size'],
                                                                         ascending=False)['collapsed_name'][:20]

# Get the number of schools per region with that name
frequent_names = schools[schools['collapsed_name'].isin(most_frequent_names)].groupby(["collapsed_name", "area"],
                                                                                      as_index=False).size()

# Get the order based on the total number of schools with the same name in Italy
order = list(frequent_names.groupby(['collapsed_name'], as_index=False).sum(
).sort_values(['size'], ascending=False)['collapsed_name'])

# Convert frequent names to categorical with a specific order
frequent_names['collapsed_name'] = pd.Categorical(frequent_names['collapsed_name'],
                                                  categories=order,
                                                  ordered=True)

# Sort the dataframe based on the ordered category
frequent_names.sort_values(
    ['collapsed_name', 'area'], inplace=True, ascending=[True, False])
# Plot results
colours = {
    "Nord Ovest": "#732EDC",
    "Nord Est": "#3F84E5",
    "Centro": "#06D6A0",
    "Sud": "#FFD166",
    "Isole": "#EF476F"
}
fig = px.bar(frequent_names, x='collapsed_name', y='size',
             color='area', 
             color_discrete_map=colours)
fig.update_layout(xaxis={'categoryorder': 'array',
                  'categoryarray': frequent_names.collapsed_name})
fig.show()


In [357]:
def replace(x):
    if "Milani" in x:
        return "Don Milani"
    elif "Dante" in x:
        return "Dante Alighieri"
    elif "Rodari" in x:
        return "Gianni Rodari"
    elif "Pirandello" in x:
        return "Luigi Pirandello"
    elif "Collodi" in x:
        return "Carlo Collodi"
    elif "De Amicis" in x:
        return "Edmondo De Amicis"
    elif ("Don Bosco" in x) or ("Giovanni Bosco" in x) or ("G. Bosco" in x) or("G.Bosco" in x) or ("G Bosco" in x) or ("Giov.Bosco" in x):
        return "San Giovanni Bosco"
    elif "Pascoli" in x:
        return "Giovanni Pascoli"
    elif "Marconi" in x:
        return "Guglielmo Marconi"
    elif "Maria Immacolata" in x:
        return "Maria Immacolata"
    elif "Montessori" in x:
        return "Maria Montessori"
    elif "Fermi" in x:
        return "Enrico Fermi"
    elif "Da Vinci" in x:
        return "Leonardo Da Vinci"
    elif "Sacro Cuore" in x:
        return "Sacro Cuore"
    elif ("I.Calvino" in x) or ("I. Calvino" in x) or ("Italo Calvino" in x):
        return "Italo Calvino"
    elif "Galile" in x:
        return "Galileo Galilei"
    elif "Giovanni Xxiii" in x:
        return "Papa Giovanni XXIII"
    elif "San Giuseppe" in x:
        return "San Giuseppe"
    elif "Arcobaleno" in x:
        return "Arcobaleno"
    elif "Casa Circondariale" in x:
        return "Casa Circondariale"
    else:
        return x

names = [replace(str(x)) for x in names]

In [358]:
# Top 20 used names in Italian Schools without repetitions
import plotly.express as px
schools['collapsed_name'] = names
most_frequent_names = schools.groupby(["collapsed_name"],
                                      as_index=False).size().sort_values(['size'],
                                                                         ascending=False)['collapsed_name'][:20]


frequent_names = schools[schools['collapsed_name'].isin(most_frequent_names)].groupby(["collapsed_name", "area"],
                        as_index=False).size()
order = list(frequent_names.groupby(['collapsed_name'], as_index=False).sum().sort_values(['size'], ascending=False)['collapsed_name'])
frequent_names['collapsed_name'] = pd.Categorical(frequent_names['collapsed_name'],
                                                  categories = order,
                                                  ordered = True)
frequent_names.sort_values(['collapsed_name', 'area'], inplace=True, ascending=[True, False])

fig = px.bar(frequent_names, x='collapsed_name', y='size', color='area', 
             color_discrete_map = colours)
fig.update_layout( xaxis={'categoryorder':'array', 'categoryarray':frequent_names.collapsed_name})
fig.show()

## School Type

### Number of schools per area

In [359]:
school_types_df = schools.groupby(["type", "area"], as_index=False).size()
fig = px.bar(school_types_df, x='area', y='size', color='type', color_discrete_map={'Paritaria': "#3F84E5", "Statale": "#EF476F"})
fig.update_layout( xaxis={'categoryorder':'array', 'categoryarray':frequent_names.collapsed_name,})
fig.show()

### Number of private and public schools among regions

In [360]:
school_types_df = schools.groupby(["type", "region"], as_index=False).size()
fig = px.bar(school_types_df, x='region', y='size', color='type', color_discrete_map={'Paritaria': "#3F84E5", "Statale": "#EF476F"})
fig.update_layout( xaxis={'categoryorder':'array', 'categoryarray':frequent_names.collapsed_name,})
fig.show()

## Geocoding of Trentino's Schools

In [361]:
trentino = schools[schools['region'].isin(['Trentino Alto Adige'])]

In [362]:
# changing the name of a missing address school
trentino.loc[24154,["name","collapsed_name"]]# = "Varollo Livo Chiara Lubich"
trentino.loc[52108,["name","collapsed_name"]] = "G.A. Slop Von Cadenberg"



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [363]:
trentino[(~trentino['address'].isna()) & (trentino['address'].str.contains("i"))]

Unnamed: 0,population,area,region,province,id,name,address,cap,mun_id,municipality,grade,email,pec,website,type,school_type,inst_id,inst_name,collapsed_name
862,8792,Nord Est,Trentino Alto Adige,Trento,TNEE84802V,Ala - Serravalle,"Via Luigi Negrelli, 10",38060,A116,Ala,Scuola Primaria,,,www.istitutocomprensivoala.it,Statale,Normale,TNIC84800Q,"""Ala""",Ala - Serravalle
863,8792,Nord Est,Trentino Alto Adige,Trento,TNEE84801T,"Ala ""Abramo Betta""","Via Abramo Betta, 9",38061,A116,Ala,Scuola Primaria,,,www.istitutocomprensivoala.it,Statale,Normale,TNIC84800Q,"""Ala""","Ala ""Abramo Betta"""
864,8792,Nord Est,Trentino Alto Adige,Trento,TNIC84800Q,"""Ala""","Via Don Anzelini, 2",38061,A116,Ala,Istituto Comprensivo,segr.ic.ala@scuole.provincia.tn.it,ic.ala@pec.provincia.tn.it,,Statale,Normale,TNIC84800Q,"""Ala""","""Ala"""
865,8792,Nord Est,Trentino Alto Adige,Trento,TNMM84801R,"Ala ""Cesare Battisti""","Via Don Anzelini, 2",38061,A116,Ala,Scuola Primo Grado,,,www.istitutocomprensivoala.it,Statale,Normale,TNIC84800Q,"""Ala""","Ala ""Cesare Battisti"""
1083,1500,Nord Est,Trentino Alto Adige,Trento,TNMM817026,Albiano,"Via Manzoni, 10",38041,A158,Albiano,Scuola Primo Grado,,,http://iccivezzano.infovals.eu,Statale,Normale,TNIC817004,"""Civezzano""",Albiano
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55513,3825,Nord Est,Trentino Alto Adige,Trento,TNEE846038,"Villa Lagarina ""Paride Lodron""","Via Stockstadt Am Rhein, 9",38060,L957,Villa Lagarina,Scuola Primaria,,,http://www.icvillalagarina.it/,Statale,Normale,TNIC846004,"""Villa Lagarina""","Villa Lagarina ""Paride Lodron"""
55514,3825,Nord Est,Trentino Alto Adige,Trento,TNIC846004,"""Villa Lagarina""","Via Stockstad Am Rheim, 3",38060,L957,Villa Lagarina,Istituto Comprensivo,segr.ic.villalagarina@scuole.provincia.tn.it,ic.villalagarina@pec.provincia.tn.it,,Statale,Normale,TNIC846004,"""Villa Lagarina""","""Villa Lagarina"""
56263,3020,Nord Est,Trentino Alto Adige,Trento,TNEE86003E,"Volano ""Tovazzi""","Via Stazione, 30",38060,M113,Volano,Scuola Primaria,,,ic-altavallagarina.it,Statale,Normale,TNIC86000A,"""Alta Vallagarina""","Volano ""Tovazzi"""
56264,3020,Nord Est,Trentino Alto Adige,Trento,TNIC86000A,"""Alta Vallagarina""","Via Stazione, 30",38060,M113,Volano,Istituto Comprensivo,segr.ic.altavallagarina@scuole.provincia.tn.it,ic.altavallagarina@pec.provincia.tn.it,,Statale,Normale,TNIC86000A,"""Alta Vallagarina""","""Alta Vallagarina"""


In [364]:
trentino[trentino.address.isnull()]

Unnamed: 0,population,area,region,province,id,name,address,cap,mun_id,municipality,grade,email,pec,website,type,school_type,inst_id,inst_name,collapsed_name
14066,7155,Nord Est,Trentino Alto Adige,Trento,TNTD10050A,"""Carlo Antonio Pilati"" - Cles - Serale",,38023,C794,Cles,Ist Tec Commerciale E Per Geometri,dirigente@istitutopilati.it,,www.istitutopilati.it,Statale,Percorso Ii Livello,TNTD100001,"""Carlo Antonio Pilati"" - Cles","""Carlo Antonio Pilati"" - Cles - Serale"
24154,954,Nord Est,Trentino Alto Adige,Trento,TNEE83102D,Livo - Varollo,,38020,E624,Livo,Scuola Primaria,,,www.iccles.it,Statale,Normale,TNIC83100A,"""Cles""",Livo - Varollo
39322,4522,Nord Est,Trentino Alto Adige,Trento,TNTD001509,La Rosa Bianca-Weisse Rose Serale,,38037,H018,Predazzo,Istituto Tecnico Commerciale,segr.iicavalese@scuole.provincia.tn.it,,scuolefiemme.tn.it,Statale,Percorso Ii Livello,TNIS00100P,"""La Rosa Bianca-Weisse Rose"" - Cavalese",La Rosa Bianca-Weisse Rose Serale
39754,1335,Nord Est,Trentino Alto Adige,Trento,TNEE83307A,Rabbi - San Bernardo,,38020,H146,Rabbi,Scuola Primaria,,,www.icbassavaldisole.it,Statale,Normale,TNIC833002,"""Bassa Val Di Sole """,Rabbi - San Bernardo
52108,118879,Nord Est,Trentino Alto Adige,Trento,TNEE85804G,G.A. Slop Von Cadenberg,,38070,L378,Trento,Scuola Primaria,,,www.istitutocomprensivotrento6.it,Statale,Normale,TNIC85800A,"""Trento 6""",G.A. Slop Von Cadenberg


In [365]:
trentino.loc[[14066,24154,39322,39754,52108],"address"] = ["Via 4 Novembre, 35", 
                                                           "Frazione Livo, 92",
                                                           "Via Gandhi, 1",
                                                           "Frazione San Bernardo, 143/A",
                                                           "Via di Coltura, 7"]
trentino.loc[[43827, 43843], "cap"] = [38068, 38068]
trentino.loc[45087,["name","address"]] = ["San G. Bosco S. Lorenzo in Banale", 
                                          "Via di Prato, San Lorenzo, 15/A"]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [366]:
import geopandas as gpd
tn_addresses = list(trentino['name'] + ", " +
                    trentino['address'] + ", " + trentino['municipality'] +
                    ", " + trentino['province'])

In [367]:
geo_tn = gpd.tools.geocode(tn_addresses, provider="arcgis")

In [368]:
trentino['geometry'] = [x for x in geo_tn['geometry']]


The array interface is deprecated and will no longer work in Shapely 2.0. Convert the '.coords' to a numpy array instead.



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [369]:
trentino.drop(['region','inst_id','collapsed_name','inst_name'],axis=1, inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [380]:
trentino = gpd.GeoDataFrame(trentino, crs = 4326)


CRS mismatch between CRS of the passed geometries and 'crs'. Use 'GeoDataFrame.set_crs(crs, allow_override=True)' to overwrite CRS or 'GeoDataFrame.to_crs(crs)' to reproject geometries. CRS mismatch will raise an error in the future versions of GeoPandas.



In [390]:
trentino['italy_zone'] = [str(x) for x in trentino['area']]
trentino.drop(['area'], inplace = True, axis=1)

In [391]:
trentino.to_file('../data/trentino_schools.geojson',driver='GeoJSON')

In [392]:
#geo_tn = gpd.read_file("../data/trentino_schools.geojson")
trentino = gpd.read_file("../data/trentino_schools.geojson",
                         geometry = "geometry")

In [395]:
import mapclassify
import folium
trentino.explore(marker_type = "marker",
               marker_kwds={"radius": "5", 
                            "color": "cornflowerblue", 
                            'icon': folium.map.Icon(prefix = 'fa',
                                                    icon = 'graduation-cap')})

## Retrieve schools via OpenStreetMap

In [295]:
import pygeos
import pyrosm
import matplotlib.pyplot as plt
import geopandas as gpd

In [278]:
trento_download_pbf_url = "https://osmit-estratti.wmcloud.org/dati/poly/province/pbf/022_Trento_poly.osm.pbf"
# download the data
import requests
#request the file
r = requests.get(trento_download_pbf_url, allow_redirects=True)
#save the file
open('../data/trento.pbf', 'wb').write(r.content)

41407385

In [3]:

# Initialize the OSM object 
import pyrosm
osm = pyrosm.OSM("../data/trento.pbf")

NameError: name 'plot' is not defined

In [297]:
custom_filter = {'amenity': ['school', 'kindergarten']}
pois = osm.get_pois(custom_filter)


The array interface is deprecated and will no longer work in Shapely 2.0. Convert the '.coords' to a numpy array instead.



In [281]:
pois.explore(marker_type="marker",
             marker_kwds={"radius": "5",
                          "color": "cornflowerblue",
                          'icon': folium.map.Icon(prefix='fa',
                                                  icon='graduation-cap')})


In [298]:
osm_schools = gpd.GeoDataFrame(
    pois,
    crs='EPSG:4326',
    geometry=gpd.points_from_xy(pois.lon,
                                pois.lat))

In [299]:
import osmnx  as ox

In [396]:
def min_dist(point, gpd2):
    gpd2['Dist'] = gpd2.apply(lambda row: point.distance(row.geometry),axis=1)
    closest_index = gpd2.iloc[gpd2['Dist'].argmin()][['geometry','name']]
    return closest_index

In [397]:
trentino[['osm_geom','osm_name']] = [min_dist(trentino.iloc[x,-1],osm_schools[~osm_schools['lon'].isnull()])
                     for x in range(len(trentino))]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [403]:
trentino.drop(['id','mun_id','province','email','pec','website'], axis=1, inplace=True)

In [408]:
trentino[['osm_name','name', 'address', 'municipality','grade']].iloc[10:20]

Unnamed: 0,osm_name,name,address,municipality,grade
10,Scuole,Andalo,"Piazza San Vito, 2",Andalo,Scuola Primaria
11,Scuola dell'infanzia di Massone,Arco - Romarzollo,"Via Della Fossa, 6",Arco,Scuola Primaria
12,Scuola dell'infanzia di Massone,"Arco ""Nicolo' D'Arco""",Loc. Prabi,Arco,Scuola Primo Grado
13,Scuola dell'infanzia di Massone,Arco - Massone,"Via A.Maffei, 2",Arco,Scuola Primaria
14,Ex scuola elementare,Arco - Bolognano,"Viale Stazione, 2",Arco,Scuola Primaria
15,Scuola dell'infanzia di Massone,"Arco ""Giovanni Segantini""","Via Nas, 28",Arco,Scuola Primaria
16,Scuola dell'infanzia di Massone,"""Arco""",Loc. Prabi,Arco,Istituto Comprensivo
17,"Istituto tecnico economico e tecnologico ""G. F...","Gardascuola ""Padre Monti""",Via Xxiv Maggio 1,Arco,Scuola Sec. Primo Grado
18,"Istituto tecnico economico e tecnologico ""G. F...",Gardascuola,Via Xxiv Maggio 1,Arco,Scuola Sec. Secondo Grado
19,"Istituto tecnico economico e tecnologico ""G. F...",Scuola Primaria Gardascuola,Via Xxiv Maggio 1,Arco,Scuola Primaria


In [None]:
https://dati.trentino.it/dataset/18034fef-88e5-4d4d-906d-5fdc381303cc/resource/1a04a591-14d7-49c9-a011-d54bc427fa0f/download/exportesco10092014.xml

https://dati.trentino.it/dataset/istituzioni-scolastiche-trentino