In [1]:
!pip install -r requirements.txt



# Search

### Living Area codes

In [None]:
location_ids=473422

### Settings

In [None]:
living_area_max=45
living_area_min=35
rooms_min=1.5

### Build URL

In [2]:
url = 'https://www.hemnet.se/salda/bostader?living_area_max=45&living_area_min=35&location_ids%5B%5D=473422&page=2&rooms_min=1.5'

# Request data from Hemnet

In [3]:
import requests

def load_html(url):
    headers = {
        'User-Agent': 'Your User Agent String'  # Replace with an appropriate User-Agent
    }

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None

if __name__ == "__main__":
    url = input("Enter the URL: ")
    #https://www.hemnet.se/salda/bostader?living_area_max=45&living_area_min=35&location_ids%5B%5D=473422&page=2&rooms_min=1.5
    html_content = load_html(url)

    if html_content:
        print("Successfully lead data from %s" % url)
        # print(html_content)
    else:
        print("Failed to fetch HTML content.")


Successfully lead data from https://www.hemnet.se/salda/bostader?living_area_max=45&living_area_min=35&location_ids%5B%5D=473422&page=2&rooms_min=1.5


In [24]:
import pandas as pd
from bs4 import BeautifulSoup
import datetime 



month_conv = {
    'januari' : 1,
    'februari' : 2,
    'mars' : 3,
    'april' : 4,
    'maj' : 5,
    'juni' : 6,
    'juli' : 7,
    'augusti' : 8,
    'september' : 9,
    'oktober' : 10,
    'november' : 11,
    'december' : 12
}

def extract_rooms(room_text):
    if "rum" in room_text:
        room_parts = room_text.split(" ")
        for part in room_parts:
            if "rum" in part:
                rooms = part.replace(",", ".").replace("&nbsp;", "").replace("rum", "")
                try:
                    return float(rooms)
                except ValueError:
                    return 0.0
    return 0.0

def extract_date_format(date_str):
    try:
        date_arr = date_str.split(' ')

        day   = int(date_arr[1])
        month = month_conv[date_arr[2]]
        year  = int(date_arr[3])

        date = datetime.date(year, month, day)

    except:
        print('Error to extract date: %s' % date_str)
        date = None

    return date

def extract_values_from_html(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')

    results = []

    listings = soup.find_all('li', class_='sold-results__normal-hit')
    for listing in listings:
        data_element = listing.find('span', class_='hcl-label--sold-at')
        data = data_element.text.strip() if data_element else ""
        date = extract_date_format(data)

        address_element = listing.find('h2', class_='sold-property-listing__heading')
        address = address_element.text.strip() if address_element else ""

        avgift_element = listing.find('div', class_='sold-property-listing__fee')
        avgift_text = avgift_element.text.strip() if avgift_element else ""
        try:
            avgift = int(avgift_text.replace(" ", "").replace("\xa0", "").replace("kr/mån", "")) if avgift_text else 0
        except ValueError as avgift_error:
            print(f"Error extracting avgift: {avgift_error}")
            avgift = avgift_text

        slutpris_element = listing.find('span', class_='hcl-text hcl-text--medium')
        slutpris_text = slutpris_element.text.strip() if slutpris_element else ""
        try:
            slutpris = int(slutpris_text.replace(" ", "").replace("\xa0", "").replace("kr", "").replace("Slutpris", "")) if slutpris_text else 0
        except ValueError as slutpris_error:
            print(f"Error extracting slutpris: {slutpris_error}")
            slutpris = slutpris_text

        sqm_pris_element = listing.find('div', class_='sold-property-listing__price-per-m2')
        sqm_pris_text = sqm_pris_element.text.strip() if sqm_pris_element else ""
        try:
            sqm_pris = int(sqm_pris_text.replace(" ", "").replace("\xa0", "").replace("kr/m²", "")) if sqm_pris_text else 0
        except ValueError as sqm_pris_error:
            print(f"Error extracting sqm_pris: {sqm_pris_error}")
            sqm_pris = sqm_pris_text

        name_element = listing.find('span', class_='sold-property-listing__first')
        name = name_element.text.strip() if name_element else ""

        size_element = listing.find('div', class_='sold-property-listing__area')
        size_text = size_element.text.split('\n')[1].strip()
        try:
            size_parts = size_text.split()
            size_numeric = float(size_parts[0].replace(",", ".")) if size_parts[0] else 0.0
        except ValueError as size_error:
            print(f"Error extracting size: {size_error}")
            size_numeric = size_text
        
        room_text = size_element.text.split('\n')[-2].strip()
        try:
            rooms = extract_rooms(room_text)
        except :
            print(f"Error extracting rooms: %s" % room_text)
            rooms = 0.0

        results.append({
            "data": data,
            "date": date,
            "address": address,
            "avgift": avgift,
            "slutpris": slutpris,
            "sqm_pris": sqm_pris,
            "name": name,
            "size": size_numeric,
            "size_plus" : 'NaN',
            "rooms": rooms
        })

    return results

# Replace this with your actual HTML response
html_response = html_content

# Call the function and create a pandas DataFrame
extracted_values = extract_values_from_html(html_response)
df = pd.DataFrame(extracted_values)

# Print the DataFrame
print(df)


                      data        date                               address  \
0     Såld 26 januari 2022  2022-01-26                     Essingestråket 29   
1    Såld 28 december 2021  2021-12-28                      Flottbrovägen 27   
2     Såld 3 december 2021  2021-12-03                 Essingeringen 15, 3tr   
3    Såld 24 november 2021  2021-11-24                     Essingestråket 21   
4    Såld 16 november 2021  2021-11-16           Badstrandsvägen 22, 3 / 3tr   
5    Såld 16 november 2021  2021-11-16               Essingestråket 11, 2 tr   
6    Såld 16 november 2021  2021-11-16                      Essingetorget 40   
7    Såld 11 november 2021  2021-11-11                    Badstrandsvägen 31   
8     Såld 30 oktober 2021  2021-10-30              Badstrandsvägen 20, 2 tr   
9     Såld 28 oktober 2021  2021-10-28                        Eknäsvägen 6 C   
10    Såld 23 oktober 2021  2021-10-23                        Eknäsvägen 6 B   
11    Såld 22 oktober 2021  2021-10-22  

In [26]:
df = pd.DataFrame()

# First call outside the loop to initialize the DataFrame structure
extracted_values = extract_values_from_html(load_html('https://www.hemnet.se/salda/bostader?living_area_max=45&living_area_min=35&location_ids%5B%5D=473422&rooms_min=1.5'))
df = df.append(extracted_values, ignore_index=True)

# Loop to call the function and add values
for i in range(6):  # Replace 10 with the desired number of iterations
    extracted_values = extract_values_from_html(load_html(urls[i]))
    df = df.append(extracted_values, ignore_index=True)

# Print the final DataFrame
print(df)

  df = df.append(extracted_values, ignore_index=True)
  df = df.append(extracted_values, ignore_index=True)
  df = df.append(extracted_values, ignore_index=True)
  df = df.append(extracted_values, ignore_index=True)
  df = df.append(extracted_values, ignore_index=True)
  df = df.append(extracted_values, ignore_index=True)


                      data        date                 address  avgift  \
0     Såld 18 augusti 2023  2023-08-18      Badstrandsvägen 19    2480   
1     Såld 13 augusti 2023  2023-08-13      Badstrandsvägen 27    2760   
2        Såld 21 juli 2023  2023-07-21       Essingestråket 23    1470   
3         Såld 7 juli 2023  2023-07-07       Stenkullavägen 54    2006   
4         Såld 7 juli 2023  2023-07-07       Essingestråket 37    1641   
..                     ...         ...                     ...     ...   
338     Såld 15 april 2013  2013-04-15   Stenkullavägen 48 1tr    2060   
339      Såld 28 mars 2013  2013-03-28        Essingetorget 44    2164   
340      Såld 15 mars 2013  2013-03-15   Essingestråket 32 2tr    3368   
341      Såld 15 mars 2013  2013-03-15  Essingestråket 11, 2tr    2856   
342  Såld 28 februari 2013  2013-02-28   Essingestråket 44 6tr    1552   

     slutpris  sqm_pris     name  size size_plus  rooms  
0     2820000     70500   Martin  40.0       NaN    2

  df = df.append(extracted_values, ignore_index=True)


In [25]:
urls = [
    'https://www.hemnet.se/salda/bostader?living_area_max=45&living_area_min=35&location_ids%5B%5D=473422&page=2&rooms_min=1.5',
    'https://www.hemnet.se/salda/bostader?living_area_max=45&living_area_min=35&location_ids%5B%5D=473422&page=3&rooms_min=1.5',
    'https://www.hemnet.se/salda/bostader?living_area_max=45&living_area_min=35&location_ids%5B%5D=473422&page=4&rooms_min=1.5',
    'https://www.hemnet.se/salda/bostader?living_area_max=45&living_area_min=35&location_ids%5B%5D=473422&page=5&rooms_min=1.5',
    'https://www.hemnet.se/salda/bostader?living_area_max=45&living_area_min=35&location_ids%5B%5D=473422&page=6&rooms_min=1.5',
    'https://www.hemnet.se/salda/bostader?living_area_max=45&living_area_min=35&location_ids%5B%5D=473422&page=7&rooms_min=1.5',
    'https://www.hemnet.se/salda/bostader?living_area_max=45&living_area_min=35&location_ids%5B%5D=473422&page=8&rooms_min=1.5',
    'https://www.hemnet.se/salda/bostader?living_area_max=45&living_area_min=35&location_ids%5B%5D=473422&page=9&rooms_min=1.5',
    'https://www.hemnet.se/salda/bostader?living_area_max=45&living_area_min=35&location_ids%5B%5D=473422&page=10&rooms_min=1.5',
    'https://www.hemnet.se/salda/bostader?living_area_max=45&living_area_min=35&location_ids%5B%5D=473422&page=11&rooms_min=1.5',
]

In [27]:
df

Unnamed: 0,data,date,address,avgift,slutpris,sqm_pris,name,size,size_plus,rooms
0,Såld 18 augusti 2023,2023-08-18,Badstrandsvägen 19,2480,2820000,70500,Martin,40.0,,2.0
1,Såld 13 augusti 2023,2023-08-13,Badstrandsvägen 27,2760,3300000,76744,Kerstin,43.0,,2.0
2,Såld 21 juli 2023,2023-07-21,Essingestråket 23,1470,3200000,91429,Martin,35.0,,2.0
3,Såld 7 juli 2023,2023-07-07,Stenkullavägen 54,2006,3050000,80263,,38.0,,2.0
4,Såld 7 juli 2023,2023-07-07,Essingestråket 37,1641,3300000,94286,,35.0,,2.0
...,...,...,...,...,...,...,...,...,...,...
338,Såld 15 april 2013,2013-04-15,Stenkullavägen 48 1tr,2060,1950000,51316,,38.0,,2.0
339,Såld 28 mars 2013,2013-03-28,Essingetorget 44,2164,2030000,47209,,43.0,,2.0
340,Såld 15 mars 2013,2013-03-15,Essingestråket 32 2tr,3368,2300000,51225,,44.9,,2.0
341,Såld 15 mars 2013,2013-03-15,"Essingestråket 11, 2tr",2856,2000000,47619,,42.0,,2.0


In [28]:
df.to_excel('stora_essingen_40.xlsx')