In [1]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd

# First define some functions

In [2]:
# Find hrefs in main search page

def generate_links(response):
    
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Index from 3 in order to skip promoted offers
    offer_links = []
    for offer in soup.find_all(class_='offer-item-details')[3:]:
        offer_links.append(offer.find('a').attrs['href'])
        
    return offer_links

In [3]:
# Find data in single offer's page and return as a json

def process_offer_link(link):
    
    # Get full html page
    offer_response = requests.get(link)
    # Convert to soup
    offer_soup = BeautifulSoup(offer_response.text, 'html.parser')
    
    # Find interesting data
    apartament_data = json.loads(offer_soup.find(id="server-app-state").text)['initialProps']['data']['advert']
    
    return apartament_data

In [4]:
# Extract relevant data from json

def collect_data_from_json(apartament_data):
    
    # Initiate empty dict
    parsed_data = dict()
    
    # Price
    parsed_data['Cena'] = apartament_data['price']['value']
    
    # Basic infos
    for factor in apartament_data['characteristics']:
        parsed_data[factor['label']] = factor['value_translated']
    
    # Date of last edit
    parsed_data['Data ogłoszenia'] = apartament_data['dateModified']
    
    # Geolocation
    parsed_data['latitude'] = apartament_data['location']['coordinates']['latitude']
    parsed_data['longitude'] = apartament_data['location']['coordinates']['longitude']
    
    # Links to small photos
    photos = [i['thumbnail'] for i in apartament_data['photos']]
    
    return parsed_data, photos

# Now lets get to scrap!

In [5]:
response = requests.get('https://www.otodom.pl/wynajem/mieszkanie/warszawa/?search%5Bfilter_float_price%3Ato%5D=3000&search%5Bfilter_enum_rooms_num%5D%5B0%5D=2&search%5Bdescription%5D=1&search%5Bcity_id%5D=26&nrAdsPerPage=72')

In [6]:
offer_links = generate_links(response)
for link in offer_links[:10]:
    print(link)

https://www.otodom.pl/oferta/piekny-mieszkanie-w-apartamentowcu-mokotow-ID43jLE.html#01ba84e7e5
https://www.otodom.pl/oferta/2-pokoje-50m2-blisko-metra-czynsz-w-cenie-ID43k93.html#01ba84e7e5
https://www.otodom.pl/oferta/wynajme-2-pok-mieszkanie-praga-poludnie-ID433Iu.html#01ba84e7e5
https://www.otodom.pl/oferta/do-wynajecia-2-pokoje-blisko-ronda-zaba-ID43ZQR.html#01ba84e7e5
https://www.otodom.pl/oferta/nowe-i-sympatyczne-45-m2-na-mokotowie-ID43ZQy.html#01ba84e7e5
https://www.otodom.pl/oferta/atrakcyjne-na-saskiej-kepie-z-miejscem-parkingowym-ID3X6LC.html#01ba84e7e5
https://www.otodom.pl/oferta/nowe-dwupokojowe-mieszkanie-z-garazem-w-ursusie-ID43PEg.html#01ba84e7e5
https://www.otodom.pl/oferta/mieszkanie-65m-warszawa-zacisze-ul-wolinska-ID43Kvy.html#01ba84e7e5
https://www.otodom.pl/oferta/do-wynajecia-komfortowe-nowe-mieszkanie-40m2-klima-ID43jPq.html#01ba84e7e5
https://www.otodom.pl/oferta/srodmiescie-powisle-2-pokoje-37-5m-balkon-piwnica-ID3SfU4.html#01ba84e7e5


In [8]:
apartaments_data = []
for link in offer_links[:10]:
    apartaments_data.append(process_offer_link(link))

In [9]:
df_list = []
photos_list = []
for apartament_data in apartaments_data:
    
    parsed_data, photos = collect_data_from_json(apartament_data)
    
    photos_list.append(photos)

    # Append features data to the list of DataFrames
    columns = list(parsed_data.keys())
    rows = list(parsed_data.values())
    df_list.append(pd.DataFrame([rows], columns=columns))

In [10]:
# Concatonate all DataFrames in desired way

pd.concat(df_list, sort=False).reset_index(drop=True)

Unnamed: 0,Cena,Kaucja,Powierzchnia,Liczba pokoi,Rodzaj zabudowy,Piętro,Liczba pięter,Okna,Ogrzewanie,Stan wykończenia,Data ogłoszenia,latitude,longitude,Materiał budynku,Rok budowy,Dostępne od,Czynsz - dodatkowo
0,2750,3 000 zł,"65,90 m²",2,apartamentowiec,7,12,plastikowe,miejskie,do zamieszkania,2019-12-22 16:52:37,52.19832,21.01715,,,,
1,2500,,50 m²,2,blok,1,4,plastikowe,miejskie,do zamieszkania,2019-12-22 16:05:59,52.267172,20.980473,cegła,1962.0,2019-11-15,
2,2500,2 500 zł,48 m²,2,blok,3,10,drewniane,miejskie,do zamieszkania,2019-12-22 15:21:57,52.24168,21.08406,inne,1978.0,2019-10-16,1 zł
3,2500,2 500 zł,37 m²,2,,2,4,drewniane,miejskie,do zamieszkania,2019-12-22 15:05:26,52.26941,21.041518,inne,2008.0,2020-01-01,
4,2900,,45 m²,2,apartamentowiec,5,8,,,,2019-12-22 14:42:14,52.176982,21.040839,,,,
5,2900,3 000 zł,54 m²,2,apartamentowiec,2,4,,,do zamieszkania,2019-12-22 14:39:10,52.226594,21.062227,,,2019-08-25,
6,2500,2 500 zł,42 m²,2,blok,3,6,,miejskie,do zamieszkania,2019-12-22 13:11:21,52.23614,21.00817,cegła,2019.0,2019-12-07,1 zł
7,1900,2 500 zł,65 m²,2,blok,2,5,,gazowe,do zamieszkania,2019-12-22 13:06:17,52.23614,21.00817,cegła,1994.0,2019-12-02,600 zł
8,2800,2 800 zł,40 m²,2,,2,8,plastikowe,miejskie,do zamieszkania,2019-12-22 12:53:47,52.185519,20.995157,cegła,2018.0,2019-12-01,390 zł
9,2400,2 400 zł,"37,50 m²",2,blok,2,10,plastikowe,miejskie,do zamieszkania,2019-12-22 12:48:18,52.225072,21.03537,wielka płyta,1969.0,2019-01-06,
