# Imports

In [47]:
import requests
import bs4
import json
import datetime as dt
import sys
import pandas as pd
import geopy
import warnings
import math
import geopandas as gpd
warnings.filterwarnings('ignore')

# Lisbon Properties for Sale - SUPERCASA Webscrapping

In [5]:
url = 'https://supercasa.pt/comprar-casas/lisboa/pagina-1'
result = requests.get(url)
soup = bs4.BeautifulSoup(result.text, 'lxml')
num_of_properties = int(soup.find_all('h1', id='searchTitle')[0].get_text().split()[0].replace('.',''))
num_prop_per_page = 25
total_pages = int(num_of_properties / num_prop_per_page)
time_estimation_for_12505 = 20
time_estimation = round(num_of_properties * 20 / 12505)

print(f'Time estimation for this task: {time_estimation} minutes.')
estimation = (dt.datetime.now() + dt.timedelta(minutes=time_estimation))
start = f'Tasks started at {dt.datetime.now().hour}h:{dt.datetime.now().minute}min. Estimated finish time {estimation.hour}h:{estimation.minute}min.'
print(start)

title, price, num_rooms, total_area, latitude, longitude, region, extras, id = ([] for i in range(9))

for n in range(1, 75):
    url = f'https://supercasa.pt/comprar-casas/lisboa/pagina-{n}'
    result = requests.get(url)
    soup = bs4.BeautifulSoup(result.text, 'lxml')

    # Find all properties on the current page
    properties = soup.find_all('div', class_='property big-picture') 

    for prop in properties:
        # Title
        a = prop.find('h2', class_='property-list-title').find('a')
        title.append(a.get_text().strip() if a else ' ')

        # Price
        span = prop.find('div', class_='property-price').find('span')
        price.append(span.get_text(strip=True) if span else ' ')

        # Features
        feature = prop.find('div', class_='property-features')
        spans = feature.find_all('span') if feature else []
        rooms = spans[0].get_text() if len(spans) > 0 else "Unknown"
        area = spans[1].get_text() if len(spans) > 1 else "Unknown"
        num_rooms.append(rooms)
        total_area.append(area)

        # Links for latitude and longitude
        link = prop.find('a', class_='property-link')
        latitude.append(link.get('data-latitude') if link else 'Unknown')
        longitude.append(link.get('data-longitude') if link else 'Unknown')

        # Extras
        highlight = prop.find('div', class_='property-highlights')
        if highlight:
            extra_spans = highlight.find_all('span')
            extras.append(', '.join([span.get_text(strip=True) for span in extra_spans]))
        else:
            extras.append(' ')

        # Address region from JSON-LD script if necessary
        script = prop.find('script', type='application/ld+json')
        if script:
            data = json.loads(script.string)
            if data.get('@type') == 'Offer':
                available_at_or_from = data.get('availableAtOrFrom', {})
                address_info = available_at_or_from.get('address', {})
                address_region = address_info.get('addressRegion', 'Not provided')
                region.append(address_region)
        else:
            region.append('Not provided')

        sys.stdout.write(f"\rProgress: {int((n / total_pages) * 100)}%")
        sys.stdout.flush()

for i in range(len(title)):
    id.append(i)

sys.stdout.write(f"\rProgress: 100%")
sys.stdout.flush()
print('\nCompleted!')

headers = ['id', 'title', 'price', 'num_rooms', 'total_area', 'latitude', 'longitude', 'region', 'extras']
final_data = [id, title, price, num_rooms, total_area, latitude, longitude, region, extras]
Lisbon_Properties = pd.DataFrame(dict(zip(headers, final_data)))

print(f"\nYou now have data on {len(Lisbon_Properties['id'])} properties located in Lisbon!")

Time estimation for this task: 20 minutes.
Tasks started at 19h:31min. Estimated finish time 19h:51min.
Progress: 100%
Completed!

You now have data on 1376 properties located in Lisbon!


In [6]:
# Lisbon_Properties.to_csv('Lisbon_Properties.csv')

In [2]:
Lisbon_Properties = pd.read_csv('Lisbon_Properties.csv', index_col=0)
Lisbon_Properties.head()

Unnamed: 0,id,title,price,num_rooms,total_area,latitude,longitude,region,extras
0,0,"Apartamento T1 em Benfica, Lisboa",269.900 €,1 quarto,Área bruta 97 m²,3875171,-92009,Benfica,
1,1,"Apartamento T2 em Alvalade, Lisboa",430.000 €,2 quartos,Área bruta 90 m²,387457392,-91425898,Alvalade,
2,2,"Apartamento T3 na Rua António Nobre, São Domin...",399.900 €,3 quartos,Área bruta 120 m²,3874657,-917989,São Domingos de Benfica,
3,3,"Apartamento T4+1 na Rua de São Paulo, Misericó...",1.650.000 €,4 quartos,Área útil 233 m²,3870794,-914468,Misericórdia,"Com elevador, Com garagem"
4,4,"Apartamento T1 em Praça de Luís de Camões, Mis...",690.000 €,1 quarto,Área bruta 93 m²,3871078,-914385,Misericórdia,Com garagem


In [3]:
Lisbon_Properties = Lisbon_Properties[~Lisbon_Properties.drop('id', axis=1).duplicated()]

In [4]:
Lisbon_Properties['num_rooms'].value_counts()

num_rooms
2 quartos     426
1 quarto      311
3 quartos     264
4 quartos     136
5 quartos      29
9 quartos      13
7 quartos       6
6 quartos       6
10 quartos      2
8 quartos       1
11 quartos      1
Name: count, dtype: int64

In [5]:
Lisbon_Properties['total_area'].unique()

array(['Área bruta 97 m²', 'Área bruta 90 m²', 'Área bruta 120 m²',
       'Área útil 233 m²', 'Área bruta 93 m²', 'Área bruta 416 m²',
       'Área bruta 55 m²', 'Área bruta 234 m²', 'Área bruta 290 m²',
       'Área bruta 262 m²', 'Área bruta 336 m²', 'Área bruta 264 m²',
       'Área bruta 60 m²', 'Área bruta 874 m²', 'Área bruta 68 m²',
       'Área útil 88 m²', 'Área bruta 48 m²', 'Área bruta 110 m²',
       'Área bruta 102 m²', 'Área útil 142 m²', 'Área bruta 76 m²',
       'Área bruta 119 m²', 'Área bruta 160 m²', 'Área bruta 54 m²',
       'Área bruta 302 m²', 'Área bruta 44 m²', 'Área bruta 150 m²',
       'Área bruta 168 m²', 'Área bruta 70 m²', 'Área bruta 49 m²',
       'Área bruta 230 m²', 'Área bruta 191 m²', 'Área bruta 426 m²',
       'Área bruta 138 m²', 'Área útil 139 m²', 'Área bruta 198 m²',
       'Área bruta 85 m²', 'Área bruta 130 m²', 'Área bruta 180 m²',
       'Área bruta 67 m²', 'Área bruta 134 m²', 'Área bruta 286 m²',
       'Área bruta 77 m²', 'Área bruta 

# Lisbon Metro Info - Wikipedia Webscrapping - Create Mobility Score

In [115]:
url = 'https://pt.wikipedia.org/wiki/Lista_de_esta%C3%A7%C3%B5es_do_Metropolitano_de_Lisboa'
response = requests.get(url)
soup = bs4.BeautifulSoup(response.text, 'html.parser')
data = str(soup.find('table', {'class': 'wikitable'}))

table = pd.read_html(data)[0]
columns = ['Nome','Outros nomes','Linha','Lat.','Long.']
Lisbon_Metro = table[columns]

Lisbon_Metro.head()

Unnamed: 0,Nome,Outros nomes,Linha,Lat.,Long.
0,Aeroporto,—,Vermelha,38.76861,−9.12861
1,Alameda,Alameda I (técn.),Verde,38.73713,−9.13388
2,Alameda,Alameda II (técn.),Vermelha,38.73697,−9.13261
3,Alfornelos,—,Azul,38.76038,−9.20435
4,Alto dos Moinhos,Centro Administrativo (prev.),Azul,38.74994,−9.18003


In [116]:
Lisbon_Metro = Lisbon_Metro[~Lisbon_Metro['Lat.'].astype(str).str.contains('Lat.')]
Lisbon_Metro = Lisbon_Metro[~Lisbon_Metro['Long.'].astype(str).str.contains('Long.')]


In [117]:
Lisbon_Metro['Lat.']

0     38.76861
1     38.73713
2     38.73697
3     38.76038
4     38.74994
5     38.75311
6     38.75847
7     38.77937
8     38.72715
9     38.74233
10    38.73266
11    38.71948
12    38.71051
13    38.71055
14    38.74770
15    38.76293
16    38.70609
17    38.76012
18    38.76027
19    38.74076
20    38.75919
21    38.75479
22    38.75156
23    38.75336
24    38.77502
25    38.74708
26    38.72320
27    38.74140
28    38.74847
29    38.77339
30    38.72431
31    38.72488
32    38.71753
33    38.77488
34    38.79334
35    38.74003
36    38.76101
37    38.76787
38    38.72892
39    38.73037
40    38.76222
41    38.73775
42    38.76737
43    38.72015
44    38.75227
45    38.71590
46    38.74784
47    38.71402
48    38.73471
49    38.73519
50    38.71369
51    38.73392
52    38.73476
53    38.78564
54    38.75997
55    38.70703
Name: Lat., dtype: object

In [118]:
Lisbon_Metro['NomeConcat'] = Lisbon_Metro['Nome']+' - '+Lisbon_Metro['Outros nomes']
Lisbon_Metro.head()

Unnamed: 0,Nome,Outros nomes,Linha,Lat.,Long.,NomeConcat
0,Aeroporto,—,Vermelha,38.76861,−9.12861,Aeroporto - —
1,Alameda,Alameda I (técn.),Verde,38.73713,−9.13388,Alameda - Alameda I (técn.)
2,Alameda,Alameda II (técn.),Vermelha,38.73697,−9.13261,Alameda - Alameda II (técn.)
3,Alfornelos,—,Azul,38.76038,−9.20435,Alfornelos - —
4,Alto dos Moinhos,Centro Administrativo (prev.),Azul,38.74994,−9.18003,Alto dos Moinhos - Centro Administrativo (prev.)


#  Cultural Facilities Info - Lisboa Aberta - Create Cultural Score

## Museums

In [14]:
# URL of the GeoJSON file
museums_url = 'https://services.arcgis.com/1dSrzEWVQn5kHHyK/arcgis/rest/services/POICultura/FeatureServer/3/query?where=1%3D1&outFields=*&f=pgeojson'

# Fetch the GeoJSON data
response = requests.get(museums_url)
response.raise_for_status()  # Check if the request was successful

# Load the GeoJSON data into a GeoDataFrame
mdf = gpd.read_file(response.text)

# Convert the GeoDataFrame to a DataFrame
mdf = pd.DataFrame(mdf)

df.head()

Unnamed: 0,OBJECTID,COD_SIG,INF_NOME,INF_MORADA,FREGUESIA,INF_TELEFONE,INF_EMAIL,INF_SITE,INF_DESCRICAO,INF_ACTIVO,GlobalID,geometry
0,1,758,Gabinete curiosidades Karnart,"Avenida da Índia, 168",Belém,+351 213 466 411/+351 914 150 935,geral@karnart.org,www.karnart.org,O Gabinete Curiosidades Karnart é a sede da KA...,1,3fe9f7da-7fad-4b14-acb3-52f2e9c0c4a6,POINT (-9.21166 38.69427)
1,2,759,Academia Dramática Familiar 1º de Novembro,"Rua da Praia de Pedrouços, 76-78",Belém,+351 213 012 601,,,Edifício do séc XIX de arquitectura cultural e...,1,54f50c25-86c4-4cfa-afda-56bc0df13bde,POINT (-9.21781 38.69461)
2,5,762,LU.CA - Teatro Luís de Camões,"Calçada da Ajuda, 80",Belém,+351 215 939 100,info@lucateatroluisdecamoes.pt,www.lucateatroluisdecamoes.pt,"Pequeno teatro de gosto neoclássico tardio, in...",1,bfdcb6c3-99a0-4295-a270-91707f347131,POINT (-9.19938 38.69903)
3,6,763,Espaço POGO,"Rua da Cintura do Porto de Lisboa, Edifício 403",Estrela,+351 916 403 994,info@pogo.pt,http://www.pogo.pt,,1,51727d32-af42-4cae-bc34-9628b47b8d51,POINT (-9.15388 38.70570)
4,7,764,Teatro Cinearte,"Largo de Santos, 2-2E",Estrela,21 396 53 60/275,barraca@mail.telepac.pt / bilheteira@abarraca.com,www.abarraca.com,Encomendado pela Sociedade Administradora de C...,1,70165c4b-3536-4eb2-83bd-36d07a1110e8,POINT (-9.15524 38.70718)


## Theaters

In [19]:
# URL of the GeoJSON file
theaters_url = 'https://services.arcgis.com/1dSrzEWVQn5kHHyK/arcgis/rest/services/POICultura/FeatureServer/4/query?where=1%3D1&outFields=*&f=pgeojson'

# Fetch the GeoJSON data
response = requests.get(theaters_url)
response.raise_for_status()  # Check if the request was successful

# Load the GeoJSON data into a GeoDataFrame
tdf = gpd.read_file(response.text)

# Convert the GeoDataFrame to a DataFrame
tdf = pd.DataFrame(tdf)

tdf.head()

Unnamed: 0,OBJECTID,COD_SIG,INF_NOME,INF_MORADA,FREGUESIA,INF_TELEFONE,INF_EMAIL,INF_SITE,INF_DESCRICAO,INF_ACTIVO,GlobalID,geometry
0,1,758,Gabinete curiosidades Karnart,"Avenida da Índia, 168",Belém,+351 213 466 411/+351 914 150 935,geral@karnart.org,www.karnart.org,O Gabinete Curiosidades Karnart é a sede da KA...,1,3fe9f7da-7fad-4b14-acb3-52f2e9c0c4a6,POINT (-9.21166 38.69427)
1,2,759,Academia Dramática Familiar 1º de Novembro,"Rua da Praia de Pedrouços, 76-78",Belém,+351 213 012 601,,,Edifício do séc XIX de arquitectura cultural e...,1,54f50c25-86c4-4cfa-afda-56bc0df13bde,POINT (-9.21781 38.69461)
2,5,762,LU.CA - Teatro Luís de Camões,"Calçada da Ajuda, 80",Belém,+351 215 939 100,info@lucateatroluisdecamoes.pt,www.lucateatroluisdecamoes.pt,"Pequeno teatro de gosto neoclássico tardio, in...",1,bfdcb6c3-99a0-4295-a270-91707f347131,POINT (-9.19938 38.69903)
3,6,763,Espaço POGO,"Rua da Cintura do Porto de Lisboa, Edifício 403",Estrela,+351 916 403 994,info@pogo.pt,http://www.pogo.pt,,1,51727d32-af42-4cae-bc34-9628b47b8d51,POINT (-9.15388 38.70570)
4,7,764,Teatro Cinearte,"Largo de Santos, 2-2E",Estrela,21 396 53 60/275,barraca@mail.telepac.pt / bilheteira@abarraca.com,www.abarraca.com,Encomendado pela Sociedade Administradora de C...,1,70165c4b-3536-4eb2-83bd-36d07a1110e8,POINT (-9.15524 38.70718)


## Cinemas

In [20]:
# URL of the GeoJSON file
cin_url = 'https://services.arcgis.com/1dSrzEWVQn5kHHyK/arcgis/rest/services/POICultura/FeatureServer/2/query?where=1%3D1&outFields=*&f=pgeojson'

# Fetch the GeoJSON data
response = requests.get(cin_url)
response.raise_for_status()  # Check if the request was successful

# Load the GeoJSON data into a GeoDataFrame
cdf = gpd.read_file(response.text)

# Convert the GeoDataFrame to a DataFrame
cdf = pd.DataFrame(cdf)

cdf.head()

Unnamed: 0,OBJECTID,COD_SIG,INF_NOME,INF_MORADA,FREGUESIA,INF_TELEFONE,INF_EMAIL,INF_SITE,INF_DESCRICAO,INF_FONTE,INF_ACTIVO,GlobalID,geometry
0,1,506,Cinema Ideal,"Rua do Loreto, 15",Misericórdia,+351 210 998 295,cinemaideal@cinemaideal.pt,www.cinemaideal.pt,"Esta sala de cinema, aberta desde 1904, conhec...",-,1,45afe625-02d7-4712-b6b8-7aa0c2116ed6,POINT (-9.14422 38.71054)
1,2,507,Cinemateca Júnior,"Praça dos Restauradores, Palácio da Foz",Santa Maria Maior,+351 213 462 157 | +351 213 476 129,cinemateca.junior@cinemateca.pt,http://www.cinemateca.pt/,"A Cinemateca Júnior, instalada no Palácio Foz,...",http://www.cinemateca.pt/,1,1d3ca135-5a24-478c-95f4-dd2dc87a65c9,POINT (-9.14217 38.71556)
2,3,508,Cinema São Jorge,"Avenida da Liberdade, 175",Santo António,+351 213 103 400,cinemasaojorge@egeac.pt,http://cinemasaojorge.pt/,"Um dos mais emblemáticos cinemas de Lisboa, co...",http://cinemasaojorge.pt/,1,1bf79faf-5d97-48b3-b085-fa4446c435a2,POINT (-9.14633 38.72029)
3,4,509,Cinemateca Portuguesa,"Rua Barata Salgueiro, 39 R/C",Santo António,+ 351 213 596 200,cinemateca@cinemateca.pt,www.cinemateca.pt/,"Em 1948 foi criada a Cinemateca Nacional, hoje...",-,1,3414bf94-62ac-43c7-a846-0d5e41251e89,POINT (-9.14875 38.72088)
4,5,510,NOS Amoreiras,Avenida Engenheiro Duarte Pacheco Lote 7 2º Lo...,Campo de Ourique,16996,cinema.amoreiras@nos.pt,http://cinemas.nos.pt/,Possui 7 salas\n,http://cinemas.nos.pt/,1,1af99162-1626-4f83-9342-f891e2b442bc,POINT (-9.16210 38.72367)


## Auditoriums

In [21]:
# URL of the GeoJSON file
aud_url = 'https://services.arcgis.com/1dSrzEWVQn5kHHyK/arcgis/rest/services/POICultura/FeatureServer/1/query?where=1%3D1&outFields=*&f=pgeojson'

# Fetch the GeoJSON data
response = requests.get(aud_url)
response.raise_for_status()  # Check if the request was successful

# Load the GeoJSON data into a GeoDataFrame
adf = gpd.read_file(response.text)

# Convert the GeoDataFrame to a DataFrame
adf = pd.DataFrame(adf)

adf.head()

Unnamed: 0,OBJECTID,COD_SIG,INF_NOME,INF_MORADA,FREGUESIA,INF_TELEFONE,INF_EMAIL,INF_SITE,INF_DESCRICAO,INF_FONTE,INF_ACTIVO,GlobalID,geometry
0,1,168,Centro de Documentação da Casa da América Latina,"Avenida da Índia, 110",Belém,+351 218 172 490,geral@casamericalatina.pt,http://casamericalatina.pt/inicio/,,-,1,3d123d45-0d43-4507-b6b3-37a4f384a251,POINT (-9.19434 38.69672)
1,2,169,UCCLA - Centro de Documentação,"Avenida da Índia, 110",Belém,+351 218 172 950,uccla@uccla.pt,http://www.uccla.pt/,A UCCLA é uma associação intermunicipal de nat...,-,1,d3cc88bf-d350-4f9d-829a-94cd08ffebb8,POINT (-9.19434 38.69675)
2,3,170,Biblioteca de Belém,Rua da Junqueira 295,Belém,+351 218 172 580,bib.belem@cm-lisboa.pt,http://blx.cm-lisboa.pt,A Biblioteca Municipal de Belém encontra-se in...,CML - Rede BLx,1,53473127-6007-4d47-9b87-fa4e563c56f8,POINT (-9.19584 38.69713)
3,4,171,Arquivo Central e Arquivo Histórico de Marinha,Avenida da Índia S/N 1º,Belém,21 362 76 00,arquivo.central@marinha.pt / arquivo.historico...,www.marinha.pt,Fundos com origem na documentação da antiga Se...,www.marinha.pt,1,ae67622f-e6cc-4a72-afd3-f69f2d4039cb,POINT (-9.19237 38.69717)
4,5,172,Biblioteca Central da Marinha,Praça do Império Museu da Marinha R/C,Belém,21 365 85 20,biblioteca.marinha@marinha.pt,www.marinha.pt,"Situada, desde 1891, na Praça do Império, na i...",www.marinha.pt,1,fd047b92-5e5f-45f4-bf47-c703cfb0012f,POINT (-9.20924 38.69720)


#  Educational Facilities Info - Lisboa Aberta - Create Educational Facilities Score

## Pre-School Public/Private

In [26]:
#Private Schools Missing
# URL of the GeoJSON file
pre_public_url = 'https://services.arcgis.com/1dSrzEWVQn5kHHyK/arcgis/rest/services/POIEducacao/FeatureServer/9/query?outFields=*&where=1%3D1&f=geojson'

# Fetch the GeoJSON data
response = requests.get(pre_public_url)
response.raise_for_status()  # Check if the request was successful

# Load the GeoJSON data into a GeoDataFrame
pre_pub = gpd.read_file(response.text)

# Convert the GeoDataFrame to a DataFrame
pre_pub = pd.DataFrame(pre_pub)
pre_pub['Access'] = 'Public'
pre_pub['Stage'] = 'Pre-School'

pre_pub.head()

Unnamed: 0,OBJECTID,COD_SIG,INF_NOME,INF_MORADA,INF_TELEFONE,INF_EMAIL,INF_SITE,INF_ACTIVO,INF_DESCRICAO,FREGUESIA,INF_ID,GlobalID,geometry,Access,Stage
0,1,1389,Escola Profissional Metropolitana de Lisboa,"Travessa da Galé, 36",+351 213 617 324,ep.metropolitanalisboa@escolas.min-edu.pt,http://www.metropolitana.pt/,1,Ensino Profissional / Rede Privada,Alcântara,11695.0,707e37d5-e1b5-45fc-85f8-cc7095308431,POINT (-9.18159 38.69930),Public,Pre-School
1,2,1390,Conservatório Metropolitano de Música de Lisboa,"Edifício Standar Eléctrica - Travessa da Galé,...",+351 213 617 320,oml@oml.pt,http://www.metropolitana.pt/,1,Ensino Artístico - Privado,Alcântara,1192.0,83edfbca-6558-460e-93d1-0475467960a1,POINT (-9.18130 38.69940),Public,Pre-School
2,3,1391,Academia de Música de Luisboa - Os Violinos,Marquês de Pombal,+351 213 630 201,violinos@acordarte.pt,http://www.violinos.net/,1,Ensino Artístico - Privado,Belém,11687.0,a421d20f-eb13-4126-a30c-2158b07fe2fd,POINT (-9.19444 38.70013),Public,Pre-School
3,4,1392,Acordarte - Academia de Música de Lisboa,"Rua Alexandre de Sá Pinto, Escola Marquês de P...",+351 213 630 201,restelo@academiamusicalisboa.com; parquedasnac...,http://academiamusicalisboa.com,1,Ensino Artístico - Privado,Belém,11687.0,24aa8435-1660-4cdd-a993-5113daefd230,POINT (-9.19444 38.70013),Public,Pre-School
4,5,1393,Escola Profissional de Imagem (ETIC),"Rua Dom Luís I, 4 e 6",+351 213 942 550,ep.imagem@escolas.min-edu.pt,http://www.epi.edu.pt,1,Ensino Profissional / Rede Privada,Misericórdia,1184.0,049de07d-4405-4a0f-8946-4ddce0c71d28,POINT (-9.14746 38.70780),Public,Pre-School


## 1º Cicle Public/Private

In [39]:
#Public
# URL of the GeoJSON file
first_public_url = 'https://services.arcgis.com/1dSrzEWVQn5kHHyK/arcgis/rest/services/POIEducacao/FeatureServer/1/query?outFields=*&where=1%3D1&f=geojson'

# Fetch the GeoJSON data
response = requests.get(first_public_url)
response.raise_for_status()  # Check if the request was successful

# Load the GeoJSON data into a GeoDataFrame
first_pub  = gpd.read_file(response.text)

# Convert the GeoDataFrame to a DataFrame
first_pub = pd.DataFrame(first_pub)
first_pub['Access'] = 'Public'
first_pub['Stage'] = 'First'


#Private
# URL of the GeoJSON file
first_private_url = 'https://services.arcgis.com/1dSrzEWVQn5kHHyK/arcgis/rest/services/POIEducacao/FeatureServer/6/query?where=1%3D1&outFields=*&f=pgeojson'

# Fetch the GeoJSON data
response = requests.get(first_private_url)
response.raise_for_status()  # Check if the request was successful

# Load the GeoJSON data into a GeoDataFrame
first_priv  = gpd.read_file(response.text)

# Convert the GeoDataFrame to a DataFrame
first_priv = pd.DataFrame(first_pub)
first_priv['Access'] = 'Private'
first_priv['Stage'] = 'First'

#Print
# first_pub.head()
first_priv.head()


Unnamed: 0,OBJECTID,COD_SIG,NOME_ESCOLA,MORADA,TIPOLOGIA,FREGUESIA12,TELEFONE,COD_POSTAL,SITE,BLOGUE,...,EMAIL,SITUACAO,PROPRIEDADE,CRIADOPOR,MODIFICADO,NOME_LABEL,GlobalID,geometry,Access,Stage
0,1,4201103017001,Escola Básica de São João de Brito,"Rua Lopes de Mendonça, 4",EB01,Alvalade,218401178,1700-271,http://aealvalade.edu.pt/,,...,ebsjb@aealvalade.edu.pt,Existente,Propriedade Municipal,Ana Cremilde Mendes Guerra Cazaux Afonso,1665676160000,EB de São João de Brito,37f1d9e2-e4a4-4781-9467-17024b0eeebc,POINT (-9.13753 38.75605),Private,First
1,3,1102208003001,Escola Básica Professora Aida Vieira,"Rua Professor Sedas Nunes, Escola",EB01,Carnide,217169300,1600-597,http://aebpc.com/,,...,eb23bpc@gmail.com,Existente,Propriedade Municipal,Ana Cremilde Mendes Guerra Cazaux Afonso,1665748113000,EB Professora Aida Vieira,4627c94e-cc5e-40cb-bab4-83970991efb9,POINT (-9.19261 38.76768),Private,First
2,5,1102301001001,Jardim de Infância do Bairro Padre Cruz,"Rua Prof. Francisco Pereira de Moura, Jardim d...",JI,Carnide,217153601,1600-746,http://aebpc.com/,,...,eb23bpc@gmail.com,Existente,Propriedade Municipal,Ana Cremilde Mendes Guerra Cazaux Afonso,1665676261000,JI do Bairro Padre Cruz,1a55a00a-9474-4c57-9a2e-128788e3a5be,POINT (-9.18971 38.76882),Private,First
3,7,1501103001001,Escola Básica Gaivotas,"Rua das Chagas, 28-40",EB01,Misericórdia,211368654,1200-107,aepassosmanuel.pt/escolas/eb1-ji-gaivotas/,http://escolajigaivotas.blogspot.com/,...,info.eb1.gvt@abc.edu.pt,Existente,Ministério da Educação e Ciência,Ana Cremilde Mendes Guerra Cazaux Afonso,1665587670000,EB Gaivotas,5c263eab-c028-4eb6-9f2e-a4afbf904eb9,POINT (-9.14518 38.71055),Private,First
4,8,1500802013001,Escola Básica Padre Abel Varzim,"Rua da Rosa, 168",EB01,Misericórdia,213460886,1200-390,http://aepassosmanuel.pt/escolas/eb1-ji-padre-...,http://escolajipav.blogspot.com/,...,info.eb1.pav@abc.edu.pt,Existente,Propriedade Municipal,Ana Cremilde Mendes Guerra Cazaux Afonso,1665587705000,EB Padre Abel Varzim,bec1da21-14b6-41e9-8434-757bde2ce853,POINT (-9.14549 38.71383),Private,First


## 2º/3º Cicle Public/Private

In [41]:
#Public - Missing Data
# URL of the GeoJSON file
second_public_url = 'https://services.arcgis.com/1dSrzEWVQn5kHHyK/arcgis/rest/services/POIEducacao/FeatureServer/1/query?outFields=*&where=1%3D1&f=geojson'

# Fetch the GeoJSON data
response = requests.get(second_public_url)
response.raise_for_status()  # Check if the request was successful

# Load the GeoJSON data into a GeoDataFrame
sec_pub  = gpd.read_file(response.text)

# Convert the GeoDataFrame to a DataFrame
sec_pub = pd.DataFrame(sec_pub)
sec_pub['Access'] = 'Public'
sec_pub['Stage'] = 'Second'


#Private
# URL of the GeoJSON file
second_private_url = 'https://services.arcgis.com/1dSrzEWVQn5kHHyK/arcgis/rest/services/POIEducacao/FeatureServer/6/query?outFields=*&where=1%3D1&f=geojson'

# Fetch the GeoJSON data
response = requests.get(second_private_url)
response.raise_for_status()  # Check if the request was successful

# Load the GeoJSON data into a GeoDataFrame
sec_priv  = gpd.read_file(response.text)

# Convert the GeoDataFrame to a DataFrame
sec_priv = pd.DataFrame(sec_priv)
sec_priv['Access'] = 'Private'
sec_priv['Stage'] = 'Second'

#Print
# sec_pub.head()
sec_priv.head()

Unnamed: 0,OBJECTID,COD_SIG,INF_NOME,INF_MORADA,INF_TELEFONE,INF_EMAIL,INF_SITE,INF_ACTIVO,INF_DESCRICAO,FREGUESIA,GlobalID,geometry,Access,Stage
0,1,1426,Colégio do Bom Sucesso,"R. Bartolomeu Dias, 53\n",+351 213010442,cl.bomsucesso@escolas.min-edu.pt,https://www.colegiobomsucesso.pt/pt/,1,,Belém,15cf71ac-1d1e-46d7-a0e7-7f09140d06c2,POINT (-9.21106 38.69558),Private,First
1,2,1427,Externato Alfredo Binet,"Rua Paulo da Gama, 14",213014247,ext.alfredobinet@escolas.min-edu.pt;,https://alfredbinet.com.pt/,1,,Belém,d693914b-1dc3-42a8-9a06-379bfaf954e2,POINT (-9.21648 38.69558),Private,First
2,3,1428,"Cooperativa ""A Torre""",Praça Malaca n.3,213011407,ext.atorre@escolas.min-edu.pt; geral@atorre.pt,https://atorre.pt/,1,,Belém,91d9dfd8-af85-4f8e-a585-a102fea77d2f,POINT (-9.21773 38.69631),Private,First
3,4,1429,"Externato ""A Escolinha""","Rua Fernão Mendes Pinto, 41",+351 213 014 452,ext.aescolinha@escolas.min-edu.pt,https://www.externatoescolinha.pt/,1,,Belém,87fd77ee-86ab-45eb-b364-d647ff140be3,POINT (-9.22490 38.69732),Private,First
4,5,1430,"Externato ""Santa Maria de Belém""","Rua Duarte Pacheco Pereira, 24 - 1º Dto, Reste...",+351 213 011 343,ext.santamdebelem@escolas.min-edu.pt,https://externatosantamariadebelem.com/,1,,Belém,a78b3476-9d5d-426d-b473-3d609519ec20,POINT (-9.21897 38.69770),Private,First


## High School Public/Private

In [43]:
#Public - Missing Data
# URL of the GeoJSON file
second_public_url = 'https://services.arcgis.com/1dSrzEWVQn5kHHyK/arcgis/rest/services/POIEducacao/FeatureServer/1/query?outFields=*&where=1%3D1&f=geojson'

# Fetch the GeoJSON data
response = requests.get(second_public_url)
response.raise_for_status()  # Check if the request was successful

# Load the GeoJSON data into a GeoDataFrame
sec_pub  = gpd.read_file(response.text)

# Convert the GeoDataFrame to a DataFrame
sec_pub = pd.DataFrame(sec_pub)
sec_pub['Access'] = 'Public'
sec_pub['Stage'] = 'Second'


#Private
# URL of the GeoJSON file
high_private_url = 'https://services.arcgis.com/1dSrzEWVQn5kHHyK/arcgis/rest/services/POIEducacao/FeatureServer/7/query?outFields=*&where=1%3D1&f=geojson'

# Fetch the GeoJSON data
response = requests.get(high_private_url)
response.raise_for_status()  # Check if the request was successful

# Load the GeoJSON data into a GeoDataFrame
hs_priv  = gpd.read_file(response.text)

# Convert the GeoDataFrame to a DataFrame
hs_priv = pd.DataFrame(sec_priv)
hs_priv['Access'] = 'Private'
hs_priv['Stage'] = 'HighSchool'

#Print
# sec_pub.head()
hs_priv.head()

Unnamed: 0,OBJECTID,COD_SIG,INF_NOME,INF_MORADA,INF_TELEFONE,INF_EMAIL,INF_SITE,INF_ACTIVO,INF_DESCRICAO,FREGUESIA,GlobalID,geometry,Access,Stage
0,2,1853,Colégio do Bom Sucesso,"Rua Bartolomeu Dias, 53\n",213011574,geral@cbomsucesso.pt,https://www.colegiobomsucesso.pt/,1,,Belém,53c0df31-ea66-4be2-8a20-1df475bc332b,POINT (-9.21106 38.69558),Private,HighSchool
1,3,1854,"Escola ""São Francisco Xavier""","Rua Duarte Pacheco Pereira, 5\n",+351 213 010 039,esc.sfranciscoxavier@escolas.min-edu.pt,http://www.esfranciscoxavier.com.sapo.pt,1,,Belém,d4d0a85a-3793-4e47-be09-3eaf1ec625b2,POINT (-9.21938 38.69782),Private,HighSchool
2,4,1855,Escola Raiz,"Av. Torre de Belém, nº 30\n",213042080,esc.raiz@escolas.min-edu.pt,https://escolaraiz.pt,1,,Belém,014406f2-2568-4276-963d-83e7ef7864bd,POINT (-9.21287 38.69944),Private,HighSchool
3,5,1856,"Centro de Pedagogia Terapêutica ""Bola de Neve""","Rua Afonso Gonçalves Baldaya, 6",+351 213 010 813,ct.boladeneve@escolas.min-edu.pt,,1,,Belém,6ea0ae05-6fba-414b-b37e-1443698ed6ea,POINT (-9.22074 38.69946),Private,HighSchool
4,6,1857,Cooperativa A Torre,"Praça de Malaca, 3;",213011407,ext.atorre@escolas.min-edu.pt,https://atorre.pt/,1,,Belém,12bcce44-5ea9-4082-88e1-dcba5475c8e9,POINT (-9.21051 38.69957),Private,HighSchool


## College Public/Private

In [57]:
# URL of the GeoJSON file
uni_url = 'https://services.arcgis.com/1dSrzEWVQn5kHHyK/arcgis/rest/services/MapaConhecimento/FeatureServer/1/query?where=1%3D1&outFields=*&f=pgeojson'

# Fetch the GeoJSON data
response = requests.get(uni_url)
response.raise_for_status()  # Check if the request was successful

# Load the GeoJSON data into a GeoDataFrame
uni_all  = response.json()

# Create GeoDataFrames from the GeoJSON dictionaries
uni_all = gpd.GeoDataFrame.from_features(uni_all['features'])

# Convert the GeoDataFrame to a DataFrame
uni_all = pd.DataFrame(uni_all)
uni_all['Access'] = 'All'
uni_all['Stage'] = 'Uni'

uni_all.head()

Unnamed: 0,geometry,OBJECTID,COD_SIG,IDTIPO,NOME,MORADA,ATORTIPOCODIGO,EMAIL,SITE,REDESOCIAL,GlobalID,Access,Stage
0,POINT (-9.09821 38.77604),1148,3307706017001,,IPLisboa | ESTeSL - Escola Superior de Tecnolo...,"Avenida D. João II, 4.69.01",10101,estesl@estesl.ipl.pt,http://www.estesl.ipl.pt,https://www.facebook.com/esteslisboa/,cb0e3435-e6d3-429d-b9fd-30ff4c9ce8a2,All,Uni
1,POINT (-9.14718 38.75688),1149,4200102086001,,ESEL | Escola Superior de Enfermagem de Lisboa...,"Avenida do Brasil, 53 (Pav 16)",10101,geral@esel.pt,http://www.esel.pt/,http://pt-pt.facebook.com/public/Escola-Superi...,1e72bb51-a5d5-41b4-b26c-fbe0a42076a3,All,Uni
2,POINT (-9.09821 38.77604),1150,3307706017001,,ESEL | Escola Superior de Enfermagem de Lisboa...,"Avenida D. João II, 4.69.01",10101,geral@esel.pt,http://www.esel.pt/,http://www.facebook.com/pages/Escola-Superior-...,4bd921b7-5e8f-40cf-9d1b-52b8af12c884,All,Uni
3,POINT (-9.16276 38.74939),1151,902012084001,,ESEL | Escola Superior de Enfermagem de Lisboa...,"Avenida Professor Egas Moniz, Escola Superior ...",10101,geral@esel.pt,http://www.esel.pt/,http://www.facebook.com/pages/Escola-Superior-...,1f530d20-e1a3-4c59-ab02-b94ba19ddf35,All,Uni
4,POINT (-9.16033 38.73922),1152,3907301054014,,ESEL | Escola Superior de Enfermagem de Lisboa...,"Rua Professor Lima Basto, IPO - Escola de Enfe...",10101,geral@esel.pt,http://www.esel.pt/,http://fr-fr.facebook.com/pages/Escola-Superio...,de2ec9f3-68e7-483d-bf6b-f8e56d7a905d,All,Uni


# Data

In [8]:
Lisbon_Properties.head()

Unnamed: 0,id,title,price,num_rooms,total_area,latitude,longitude,region,extras
0,0,"Apartamento T1 em Benfica, Lisboa",269.900 €,1 quarto,Área bruta 97 m²,3875171,-92009,Benfica,
1,1,"Apartamento T2 em Alvalade, Lisboa",430.000 €,2 quartos,Área bruta 90 m²,387457392,-91425898,Alvalade,
2,2,"Apartamento T3 na Rua António Nobre, São Domin...",399.900 €,3 quartos,Área bruta 120 m²,3874657,-917989,São Domingos de Benfica,
3,3,"Apartamento T4+1 na Rua de São Paulo, Misericó...",1.650.000 €,4 quartos,Área útil 233 m²,3870794,-914468,Misericórdia,"Com elevador, Com garagem"
4,4,"Apartamento T1 em Praça de Luís de Camões, Mis...",690.000 €,1 quarto,Área bruta 93 m²,3871078,-914385,Misericórdia,Com garagem


In [9]:
def extra_rooms(x):
    if '+' in x:
        return int(x.split('+')[1])
    else:
        return 0

def area(x):
    if 'Área' in x:
        return int(x.split()[2].replace('.',''))
    else:
        return 'Unknown'

def num_extras(x):
    if x.isspace():
        return 0
    elif ',' in x:
        return int(len(x.split(',')))
    else:
        return 1

In [119]:
#Properties Transformation
Lisbon_Properties['Type'] = Lisbon_Properties['title'].apply(lambda x: x.split()[0])
Lisbon_Properties['Typology'] = Lisbon_Properties['title'].apply(lambda x: x.split()[1])
Lisbon_Properties['Extra_Rooms'] = Lisbon_Properties['Typology'].apply(extra_rooms)
Lisbon_Properties['Extra_Rooms_Flag'] = Lisbon_Properties['Extra_Rooms'].apply(lambda x: 1 if x > 0 else 0)
Lisbon_Properties['N_Rooms'] = Lisbon_Properties['num_rooms'].apply(lambda x: int(x.split()[0]))
Lisbon_Properties['Total_N_Rooms'] = Lisbon_Properties['N_Rooms'] + Lisbon_Properties['Extra_Rooms']
Lisbon_Properties['Price'] = Lisbon_Properties['price'].apply(lambda x: int(x.replace('.','').split()[0]))
Lisbon_Properties['Area_m2'] = Lisbon_Properties['total_area'].apply(area)
Lisbon_Properties['N_Extras'] = Lisbon_Properties['extras'].apply(num_extras)
Lisbon_Properties['Extras_Flag'] = Lisbon_Properties['N_Extras'].apply(lambda x: 1 if x > 0 else 0)
Lisbon_Properties['Latitude'] = Lisbon_Properties['latitude'].apply(lambda x: float(x.replace(',','.')))
Lisbon_Properties['Longitude'] = Lisbon_Properties['longitude'].apply(lambda x: float(x.replace(',','.')))
Lisbon_Properties.rename(columns={'region':'Region'}, inplace=True)

#Metro Transformation
Lisbon_Metro['Lat.'] = Lisbon_Metro['Lat.'].str.replace('−', '-').astype(float)
Lisbon_Metro['Long.'] = Lisbon_Metro['Long.'].str.replace('−', '-').astype(float)



In [86]:
Lisbon_Metro.dtypes


Nome            object
Outros nomes    object
Linha           object
Lat.            object
Long.           object
dtype: object

In [11]:
extras_df = Lisbon_Properties['extras'].str.split(',', expand=True)

extras = []
unique_extras = []

for x in [0,1,2,3,4]:
    extras.append(list(extras_df[x].unique()))

for ext in extras:
    unique_extras += ext

unique_extras = set(unique_extras)
unique_extras = {extra.strip() for extra in unique_extras if extra is not None and extra.strip()}

unique_extras


{'Com elevador',
 'Com garagem',
 'De luxo',
 'Piscina',
 'Rés do chão',
 'Vista para mar',
 'Último andar'}

In [12]:
for feature in unique_extras:
    Lisbon_Properties[feature] = Lisbon_Properties['extras'].apply(lambda x: int(feature in x))

In [13]:
Lisbon_Properties.head()

Unnamed: 0,id,title,price,num_rooms,total_area,latitude,longitude,Region,extras,Type,...,Extras_Flag,Latitude,Longitude,Com garagem,Rés do chão,De luxo,Último andar,Com elevador,Vista para mar,Piscina
0,0,"Apartamento T1 em Benfica, Lisboa",269.900 €,1 quarto,Área bruta 97 m²,3875171,-92009,Benfica,,Apartamento,...,0,38.75171,-9.2009,0,0,0,0,0,0,0
1,1,"Apartamento T2 em Alvalade, Lisboa",430.000 €,2 quartos,Área bruta 90 m²,387457392,-91425898,Alvalade,,Apartamento,...,0,38.745739,-9.14259,0,0,0,0,0,0,0
2,2,"Apartamento T3 na Rua António Nobre, São Domin...",399.900 €,3 quartos,Área bruta 120 m²,3874657,-917989,São Domingos de Benfica,,Apartamento,...,0,38.74657,-9.17989,0,0,0,0,0,0,0
3,3,"Apartamento T4+1 na Rua de São Paulo, Misericó...",1.650.000 €,4 quartos,Área útil 233 m²,3870794,-914468,Misericórdia,"Com elevador, Com garagem",Apartamento,...,1,38.70794,-9.14468,1,0,0,0,1,0,0
4,4,"Apartamento T1 em Praça de Luís de Camões, Mis...",690.000 €,1 quarto,Área bruta 93 m²,3871078,-914385,Misericórdia,Com garagem,Apartamento,...,1,38.71078,-9.14385,1,0,0,0,0,0,0


In [14]:
region_dummies = pd.get_dummies(Lisbon_Properties['Region'],dtype=int)
type_dummies = pd.get_dummies(Lisbon_Properties['Type'],dtype=int)

In [15]:
Lisbon_Properties = pd.concat([Lisbon_Properties, region_dummies, type_dummies], axis=1)

In [16]:
Lisbon_Properties.drop(['Region','Type'], axis=1, inplace=True)

In [17]:
Lisbon_Properties.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1195 entries, 0 to 1380
Data columns (total 59 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id                       1195 non-null   int64  
 1   title                    1195 non-null   object 
 2   price                    1195 non-null   object 
 3   num_rooms                1195 non-null   object 
 4   total_area               1195 non-null   object 
 5   latitude                 1195 non-null   object 
 6   longitude                1195 non-null   object 
 7   extras                   1195 non-null   object 
 8   Typology                 1195 non-null   object 
 9   Extra_Rooms              1195 non-null   int64  
 10  Extra_Rooms_Flag         1195 non-null   int64  
 11  N_Rooms                  1195 non-null   int64  
 12  Total_N_Rooms            1195 non-null   int64  
 13  Price                    1195 non-null   int64  
 14  Area_m2                  1195

In [18]:
Lisbon_Properties.head()

Unnamed: 0,id,title,price,num_rooms,total_area,latitude,longitude,extras,Typology,Extra_Rooms,...,São Vicente,Apartamento,Casa,Duplex,Flat,Loft,Moradia,Palacete,Penthouse,Quinta
0,0,"Apartamento T1 em Benfica, Lisboa",269.900 €,1 quarto,Área bruta 97 m²,3875171,-92009,,T1,0,...,0,1,0,0,0,0,0,0,0,0
1,1,"Apartamento T2 em Alvalade, Lisboa",430.000 €,2 quartos,Área bruta 90 m²,387457392,-91425898,,T2,0,...,0,1,0,0,0,0,0,0,0,0
2,2,"Apartamento T3 na Rua António Nobre, São Domin...",399.900 €,3 quartos,Área bruta 120 m²,3874657,-917989,,T3,0,...,0,1,0,0,0,0,0,0,0,0
3,3,"Apartamento T4+1 na Rua de São Paulo, Misericó...",1.650.000 €,4 quartos,Área útil 233 m²,3870794,-914468,"Com elevador, Com garagem",T4+1,1,...,0,1,0,0,0,0,0,0,0,0
4,4,"Apartamento T1 em Praça de Luís de Camões, Mis...",690.000 €,1 quarto,Área bruta 93 m²,3871078,-914385,Com garagem,T1,0,...,0,1,0,0,0,0,0,0,0,0


##On stations bring the nb of stations near a house instead of flagging which stations are in the vicinities. 
Alternative is to flag whether or not there's a metro nearby.

In [129]:
# Haversine function to calculate the distance between two points on the Earth
def haversine(lat1, lon1, lat2, lon2):
    # Convert latitude and longitude from degrees to radians
    lat1 = math.radians(lat1)
    lon1 = math.radians(lon1)
    lat2 = math.radians(lat2)
    lon2 = math.radians(lon2)
    
    # Haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    
    a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    
    # Radius of Earth in kilometers
    R = 6371.0
    
    # Calculate the distance
    distance = R * c
    
    return distance

#Define Distance Threshold
dist_threshold = 1.5

# Prepare a list to store the distances
distances = []

# Calculate distances between each property and each metro station
for p_index, p_row in Lisbon_Properties.iterrows():
    count_nearby_stations = 0
    for m_index, m_row in Lisbon_Metro.iterrows():
        distance = haversine(p_row['Latitude'], p_row['Longitude'], m_row['Lat.'], m_row['Long.'])
        if distance < dist_threshold:
            count_nearby_stations += 1
        distances.append({
            'property_id': p_row['id'],
            'property_title': p_row['title'],
            'metro_station': m_row['Nome'],
            'distance_km': distance
        })
    Lisbon_Properties.at[p_index, 'stations_within_1.5km'] = count_nearby_stations

# Create a DataFrame from the distances list
Distances_df = pd.DataFrame(distances)

# Print the DataFrame with distances
Distances_df.head()



Unnamed: 0,property_id,property_title,metro_station,distance_km
0,0,"Apartamento T1 em Benfica, Lisboa",Aeroporto,6.543674
1,0,"Apartamento T1 em Benfica, Lisboa",Alameda,6.034241
2,0,"Apartamento T1 em Benfica, Lisboa",Alameda,6.145132
3,0,"Apartamento T1 em Benfica, Lisboa",Alfornelos,1.009409
4,0,"Apartamento T1 em Benfica, Lisboa",Alto dos Moinhos,1.820479


In [127]:
Distances_df.loc[Distances_df['property_id'] == 0]

Unnamed: 0,property_id,property_title,metro_station,distance_km
0,0,"Apartamento T1 em Benfica, Lisboa",Aeroporto,6.543674
1,0,"Apartamento T1 em Benfica, Lisboa",Alameda,6.034241
2,0,"Apartamento T1 em Benfica, Lisboa",Alameda,6.145132
3,0,"Apartamento T1 em Benfica, Lisboa",Alfornelos,1.009409
4,0,"Apartamento T1 em Benfica, Lisboa",Alto dos Moinhos,1.820479
5,0,"Apartamento T1 em Benfica, Lisboa",Alvalade,4.940078
6,0,"Apartamento T1 em Benfica, Lisboa",Amadora Este,1.664757
7,0,"Apartamento T1 em Benfica, Lisboa",Ameixoeira,4.718982
8,0,"Apartamento T1 em Benfica, Lisboa",Anjos,6.346299
9,0,"Apartamento T1 em Benfica, Lisboa",Areeiro,5.934031


In [128]:
# Print the updated Lisbon_Properties DataFrame with the new column
Lisbon_Properties.head()

Unnamed: 0,id,title,price,num_rooms,total_area,latitude,longitude,extras,Typology,Extra_Rooms,...,Casa,Duplex,Flat,Loft,Moradia,Palacete,Penthouse,Quinta,Type,stations_within_1.5km
0,0,"Apartamento T1 em Benfica, Lisboa",269.900 €,1 quarto,Área bruta 97 m²,3875171,-92009,,T1,0,...,0,0,0,0,0,0,0,0,Apartamento,4.0
1,1,"Apartamento T2 em Alvalade, Lisboa",430.000 €,2 quartos,Área bruta 90 m²,387457392,-91425898,,T2,0,...,0,0,0,0,0,0,0,0,Apartamento,9.0
2,2,"Apartamento T3 na Rua António Nobre, São Domin...",399.900 €,3 quartos,Área bruta 120 m²,3874657,-917989,,T3,0,...,0,0,0,0,0,0,0,0,Apartamento,4.0
3,3,"Apartamento T4+1 na Rua de São Paulo, Misericó...",1.650.000 €,4 quartos,Área útil 233 m²,3870794,-914468,"Com elevador, Com garagem",T4+1,1,...,0,0,0,0,0,0,0,0,Apartamento,8.0
4,4,"Apartamento T1 em Praça de Luís de Camões, Mis...",690.000 €,1 quarto,Área bruta 93 m²,3871078,-914385,Com garagem,T1,0,...,0,0,0,0,0,0,0,0,Apartamento,9.0


<html>
<p><strong>Great Circle Distance Formula:</strong></p>
<p>The formula to calculate the great circle or 'as the crow flies' distance between two points on the Earth's surface, given their latitude and longitude is:</p>
<p style="font-family: 'Lucida Console', Monaco, monospace;">
  \( d = 2R \times \sin^{-1}\left(\sqrt{\sin^2\left(\frac{\theta_2 - \theta_1}{2}\right) + \cos \theta_1 \times \cos \theta_2 \times \sin^2\left(\frac{\phi_2 - \phi_1}{2}\right)}\right) \)
</p>
<p>where:</p>
<ul>
  <li><strong>\( (\theta_1, \phi_1) \)</strong> and <strong>\( (\theta_2, \phi_2) \)</strong> – Coordinates of each point (latitude and longitude, respectively);</li>
  <li><strong>R</strong> – Radius of the Earth; and</li>
  <li><strong>d</strong> – Great circle distance between the points.</li>
</l>
</html>


In [None]:
Lisbon_Metro.head()

Unnamed: 0,Nome,Outros nomes,Linha,Lat.,Long.
0,Aeroporto,—,Vermelha,38.76861,−9.12861
1,Alameda,Alameda I (técn.),Verde,38.73713,−9.13388
2,Alameda,Alameda II (técn.),Vermelha,38.73697,−9.13261
3,Alfornelos,—,Azul,38.76038,−9.20435
4,Alto dos Moinhos,Centro Administrativo (prev.),Azul,38.74994,−9.18003


##Keep this cell LAST 

In [None]:
columns_to_drop = ['title', 'price', 'num_rooms', 'total_area', 'latitude', 'longitude', 'Typology','extras']
Lisbon_Properties = Lisbon_Properties.drop(columns_to_drop, axis=1)