In [61]:
import pandas as pd

# Data over apartment prices
booli_data = pd.read_csv('Booli_sold.csv')

# Prints column number and column name for navigation
for idx, col_name in enumerate(booli_data.columns):
    print(f"{idx}: {col_name}")


0: listPrice
1: rent
2: livingArea
3: rooms
4: published
5: constructionYear
6: objectType
7: booliId
8: soldDate
9: soldPrice
10: soldPriceSource
11: url
12: floor
13: additionalArea
14: apartmentNumber
15: plotArea
16: location.address.streetAddress
17: location.position.latitude
18: location.position.longitude
19: location.position.isApproximate
20: location.region.municipalityName
21: location.region.countyName
22: location.distance.ocean
23: source.name
24: source.id
25: source.type
26: source.url


In [44]:
# Cleaning the data

booli_data_cleaned = pd.read_csv('Booli_sold.csv', usecols = [0,1,2,3,4,5,6,8,9,10,12,13,14,16,17,18,19,20,21,22,26])

for idx, col_name in enumerate(booli_data_cleaned.columns):
    print(f"{idx}: {col_name}")

0: listPrice
1: rent
2: livingArea
3: rooms
4: published
5: constructionYear
6: objectType
7: soldDate
8: soldPrice
9: soldPriceSource
10: floor
11: additionalArea
12: apartmentNumber
13: location.address.streetAddress
14: location.position.latitude
15: location.position.longitude
16: location.position.isApproximate
17: location.region.municipalityName
18: location.region.countyName
19: location.distance.ocean
20: source.url


In [52]:
# Calculate price per sqm 
booli_data_cleaned['price_per_sqm'] = booli_data_cleaned['soldPrice'] / booli_data_cleaned['livingArea']

# Create a new file with cleaned data and a price per sqm column. 
booli_data_cleaned.to_csv('Booli_with_price_per_sqm.csv', index=False)


In [59]:
# Sorting the data 

# Top 5 most expensive price per sqm 
top_5_expensive = booli_data_cleaned.sort_values(by='price_per_sqm', ascending = False).head(5)
print(top_5_expensive[['soldPrice', 'price_per_sqm', 'location.address.streetAddress', 'rooms', 'rent']])

top_5_expensive.to_csv('Booli_PPSQM_sorted.csv', index=False)

     soldPrice  price_per_sqm location.address.streetAddress  rooms    rent
67     2500000  104166.666667                  Ekhagsvägen 8    1.0  1464.0
50     2450000  102083.333333                  Ekhagsvägen 8    1.0  1464.0
54     2300000  100000.000000                 Ekhagsvägen 10    1.0  1374.0
34     2210000   88400.000000                Torphagsvägen 4    1.0  1144.0
55     2300000   85501.858736                Docentbacken 1B    1.0  1522.0
..         ...            ...                            ...    ...     ...
93     3570000   39230.769231                  Ekhagsvägen 5    4.0  4529.0
141    2570000   37246.376812                  Ekhagsvägen 6    2.0  3530.0
146    3500000   35714.285714                 Docentbacken 5    3.0  4251.0
151    2250000   35714.285714                Docentbacken 13    2.0  2835.0
102    3135000            NaN                  Ekhagsvägen 5    4.0  4529.0

[158 rows x 5 columns]


In [51]:
ekhagen_data = booli_data_cleaned

#Calculate average ppsqm for ekhagen
ekhagen_ppsqm = ekhagen_data['price_per_sqm'].mean()

print("Average price per sqm in Ekhagen is: ", ekhagen_ppsqm, "kr")



Average price per sqm in Ekhagen is:  58759.382760081724 kr


Looking at the data it is interesting to me personally that distance to the ocean has little to no impact on the price. Also that the price per sqm seem to increase a lot more for smaller apartments than bigger ones. 