In [1]:
import numpy as np
import pandas as pd

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

#show every column
pd.set_option('display.max_columns', None)

## Joining the data

First of all, since we have three datasets, let's join them all into one big dataset.

In [2]:
data_houses = pd.read_csv("dataset_houses.csv")
data_apartments = pd.read_csv("dataset_apartments.csv")
data_sharehouses = pd.read_csv("dataset_sharehouses.csv")

data = pd.concat([data_houses, data_apartments, data_sharehouses])

## Exploratory analysis

We will now do a quick exploratory data analysis.

In [3]:
# Describe the numeric attributes.
data.describe()

Unnamed: 0,Price,Rooms,Number of floors,Bedrooms,Built in year,Register number
count,7208.0,7057.0,2655.0,1644.0,6136.0,866.0
mean,173143.7,3.405696,1.889642,3.259732,1985.422588,8405146.0
std,224161.5,2.383852,0.910184,1.795052,62.22669,16901840.0
min,5000.0,1.0,1.0,1.0,1.0,16.0
25%,60000.0,2.0,1.0,3.0,1968.0,1782114.0
50%,128000.0,3.0,2.0,3.0,1992.0,3047403.0
75%,215000.0,4.0,2.0,4.0,2021.0,9863150.0
max,6950000.0,47.0,19.0,33.0,2105.0,120842600.0


In [4]:
data.describe(include=[object])

Unnamed: 0,Title,Description,Link,Location,Total area,Condition,Readiness,Ground area,Cadastre no.,Energy mark,Lisainfo,Neighbourhood,Ownership,Notify about incorrect advertisement,Kitchen,Sanitary arrangements,Heating and ventilation,Communications and security,Data from realestate book,Additional information,This floor/Number of floors,Kulud suvel/talvel
count,7433,7433,7433,7266,7292,6570,1174,2077,3926,5464,6012,4075,6496,3642,4692,5768,5968,4967,4122,547,4545,448
unique,6205,2607,7433,5318,2019,8,5,1569,2853,10,4624,1666,7,1,392,1296,606,586,1,476,161,234
top,"Apartment for sale, 2 rooms, Järve 2, Kristiin...","Apartment ownership, stone house",https://www.kv.ee/muua-paide-linnas-valguskull...,"59.3097965,24.8367254",120 m²,all brand-new,ready,600 m²,79301:001:0866,Missing,"ventilation, parquet, lift, parking free parking",roads paved roads,apartment ownership,Notify about incorrect advertisement,open kitchen,shower,central heating,frontdoor locked,Data from realestate book,"underground garage, box-room, new electricity,...",1/2,60 € / 120 €
freq,21,1942,1,44,51,2226,1036,24,36,1581,83,269,4273,3642,666,384,1855,427,4122,20,436,18


In [5]:
data.columns

Index(['Title', 'Description', 'Link', 'Location', 'Price', 'Rooms',
       'Total area', 'Number of floors', 'Condition', 'Readiness',
       'Ground area', 'Cadastre no.', 'Energy mark', 'Lisainfo',
       'Neighbourhood', 'Bedrooms', 'Built in year', 'Ownership',
       'Notify about incorrect advertisement', 'Kitchen',
       'Sanitary arrangements', 'Heating and ventilation',
       'Communications and security', 'Data from realestate book',
       'Register number', 'Additional information',
       'This floor/Number of floors', 'Kulud suvel/talvel'],
      dtype='object')

## Cleaning the data

First, drop the columns which are useless for predicting the prices. Apart from the obvious ones, drop also the summer/winter costs because there are so few samples.

In [6]:
data.drop(columns=["Title", "Description", "Link", "Cadastre no.", "Data from realestate book", "Notify about incorrect advertisement", "Kulud suvel/talvel"], inplace=True)

data.describe(include=[object])
data.columns

Unnamed: 0,Location,Total area,Condition,Readiness,Ground area,Energy mark,Lisainfo,Neighbourhood,Ownership,Kitchen,Sanitary arrangements,Heating and ventilation,Communications and security,Additional information,This floor/Number of floors
count,7266,7292,6570,1174,2077,5464,6012,4075,6496,4692,5768,5968,4967,547,4545
unique,5318,2019,8,5,1569,10,4624,1666,7,392,1296,606,586,476,161
top,"59.3097965,24.8367254",120 m²,all brand-new,ready,600 m²,Missing,"ventilation, parquet, lift, parking free parking",roads paved roads,apartment ownership,open kitchen,shower,central heating,frontdoor locked,"underground garage, box-room, new electricity,...",1/2
freq,44,51,2226,1036,24,1581,83,269,4273,666,384,1855,427,20,436


Index(['Location', 'Price', 'Rooms', 'Total area', 'Number of floors',
       'Condition', 'Readiness', 'Ground area', 'Energy mark', 'Lisainfo',
       'Neighbourhood', 'Bedrooms', 'Built in year', 'Ownership', 'Kitchen',
       'Sanitary arrangements', 'Heating and ventilation',
       'Communications and security', 'Register number',
       'Additional information', 'This floor/Number of floors'],
      dtype='object')

## Simple data type conversions

Remove the m2 from the area field to convert them to numeric.

In [7]:
data["Total area"] = data['Total area'].astype('str').map(lambda x: x.split()[0]).astype('float64') #remove m2
data["Ground area"] = data['Ground area'].astype('str').map(lambda x: x.split()[0]).astype('float64') #remove m2

Convert the "Energy mark" into a number.

In [8]:
def remap_column(data, column, mapping):
    data[column] = data[column].map(mapping)

In [9]:
data["Energy mark"].unique()
# data["Energy mark"] = data["Energy mark"].map({'C': 2, 'B': 1, np.nan: 0, '-': 0, 'Missing': 0, 'A': 0, 'H': 7, 'E': 4, 'D': 3, 'G': 6, 'F': 5})
remap_column(data, "Energy mark", {'C': 2, 'B': 1, np.nan: 0, '-': 0, 'Missing': 0, 'A': 0, 'H': 7, 'E': 4, 'D': 3, 'G': 6, 'F': 5})
data["Energy mark"].unique()

array(['-', nan, 'Missing', 'E', 'A', 'B', 'C', 'H', 'D', 'G', 'F'],
      dtype=object)

array([0, 4, 1, 2, 7, 3, 6, 5], dtype=int64)

In [10]:
data["Condition"].unique()
remap_column(data, "Condition", {'all brand-new': 6, 'Good condition': 5, 'ready': 5, 'needs renovating': 0, 'satisfactory': 1, np.nan: 1, 'sanitary renovation needed': 2, 'renovated': 4,'sanitary renovation done': 3})
data["Condition"].unique()

array(['needs renovating', 'all brand-new', 'Good condition', 'ready',
       'sanitary renovation needed', 'renovated', nan, 'satisfactory',
       'sanitary renovation done'], dtype=object)

array([0, 6, 5, 2, 4, 1, 3], dtype=int64)

In [11]:
data["Readiness"].unique()
remap_column(data, "Readiness", {'ready': 4, 'roofed box': 2, np.nan: 4, 'box with doors and windows': 3, 'roofless box': 1, 'foundation': 0})
data["Readiness"].unique()

array(['ready', nan, 'roofed box', 'box with doors and windows',
       'roofless box', 'foundation'], dtype=object)

array([4, 2, 3, 1, 0], dtype=int64)

In [12]:
data["Ownership"].unique()

array([nan, 'private property', 'apartment ownership', 'movable',
       'joint ownership', 'apartment association', 'building lease',
       'logical part'], dtype=object)

## Separating features

First of all, try to find which columns contain many different features.

Before converting columns to multiple features, it might be useful to assign reasonable values to NaNs in those columns!

In [13]:
# Create a function to assist in removing invalid values.
def fill_na_with_mode(data, column):
    return data[column].fillna(value=data[column].mode().values[0], inplace=True)

def fill_na_with_mean(data, column):
    return data[column].fillna(value=data[column].mean(), inplace=True)

In [14]:
fill_na_with_mode(data, "Ownership")
data = pd.get_dummies(data, columns=["Ownership"])

Now, some more complex feature extraction.

First, what are the unique feature values in the column "Lisainfo".

In [15]:
def find_unique_features(dataset, column):
    unique_features = set()

    for entry in dataset[column].unique():
        if not pd.isna(entry):
            features = entry.strip().split(", ")
            #print(features)
            unique_features.update(features)

    return unique_features

unique_features = find_unique_features(data, "Lisainfo")
unique_features

{'3*380V',
 'TV-set',
 'balcony',
 'balcony ',
 'balcony 0.5 m²',
 'balcony 0.9 m²',
 'balcony 1 m²',
 'balcony 1.4 m²',
 'balcony 1.5 m²',
 'balcony 1.6 m²',
 'balcony 1.8 m²',
 'balcony 10 m²',
 'balcony 10.0 m²',
 'balcony 10.2 m²',
 'balcony 10.3 m²',
 'balcony 10.4 m²',
 'balcony 10.5 m²',
 'balcony 10.6 m²',
 'balcony 10.7 m²',
 'balcony 10.8 m²',
 'balcony 10.9 m²',
 'balcony 11 m²',
 'balcony 11.1 m²',
 'balcony 11.2 m²',
 'balcony 11.4 m²',
 'balcony 11.5 m²',
 'balcony 11.6 m²',
 'balcony 11.7 m²',
 'balcony 11.8 m²',
 'balcony 12 m²',
 'balcony 12.1 m²',
 'balcony 12.2 m²',
 'balcony 12.3 m²',
 'balcony 12.4 m²',
 'balcony 12.5 m²',
 'balcony 12.6 m²',
 'balcony 12.7 m²',
 'balcony 12.8 m²',
 'balcony 12.9 m²',
 'balcony 13 m²',
 'balcony 13.1 m²',
 'balcony 13.2 m²',
 'balcony 13.3 m²',
 'balcony 13.4 m²',
 'balcony 13.5 m²',
 'balcony 13.6 m²',
 'balcony 13.8 m²',
 'balcony 13.9 m²',
 'balcony 14 m²',
 'balcony 14.1 m²',
 'balcony 14.2 m²',
 'balcony 14.3 m²',
 'balcony 14

Extract numeric features from "Lisainfo"

In [16]:
data_balcony = []
data_balcony_size = []

for i in data["Lisainfo"].values:
    balcony = 0 #Base values
    balcony_size = 0.0
    
    if not pd.isna(i):
        features = i.strip().split(", ")
        
        for feature in features:
            if feature.find("balcony") == 0:
                balcony = 1
                balcony_el = feature.strip().split()
                if len(balcony_el) > 1:
                    balcony_size = float(balcony_el[1])
                    
    data_balcony.append(balcony)
    data_balcony_size.append(balcony_size)

data["Balcony"] = data_balcony
data["Balcony size"] = data_balcony_size

In [17]:
#Probably don't care about that feature
data_dist_from_Tallinn = []

for i in data["Lisainfo"].values:
    dist_from_Tallinn = 0.0
    
    if not pd.isna(i):
        features = i.strip().split(", ")
        
        for feature in features:
            if feature.find("distance from Tallinn") != -1:
                dist_from_Tallinn = float(feature.strip().split()[3])
    data_dist_from_Tallinn.append(dist_from_Tallinn)

data["Distance from Tallinn"] = data_dist_from_Tallinn

Extract categorical features from "Lisainfo".

In [18]:
data_addition = []

for i in data["Lisainfo"].values:
    current = np.nan

    if not pd.isna(i):
        features = i.strip().split(", ")

        for feature in features:
            if feature.find("parking") != -1:
                current = ' '.join(feature.strip().split()[1:-1])
    data_addition.append(current)

data["Parking"] = data_addition

In [19]:
data_addition = []

for i in data["Lisainfo"].values:
    current = np.nan

    if not pd.isna(i):
        features = i.strip().split(", ")

        for feature in features:
            if feature.find("wall") != -1:
                current = ' '.join(feature.strip().split()[1:])
    data_addition.append(current)

data["Wall"] = data_addition

In [20]:
data_addition = []

for i in data["Lisainfo"].values:
    current = np.nan

    if not pd.isna(i):
        features = i.strip().split(", ")

        for feature in features:
            if feature.find("roof") != -1:
                current = ' '.join(feature.strip().split()[1:-1])
    data_addition.append(current)

data["Roof"] = data_addition

Remove the now separated features from 'unique features' "Lisainfo"

In [21]:
filtered_unique_features = [ x for x in unique_features if "balcony" not in x and 
                                                            "distance from Tallinn" not in x and 
                                                            "parking" not in x and
                                                            "wall" not in x and 
                                                            "roof" not in x ]

Separate the remaining features

In [22]:
def separate(dataset, uniques, column):
    for unique_feature in uniques:
        data_addition = []

        for i in data[column].values:
            current = False

            if not pd.isna(i):
                features = i.strip().split(", ")

                for feature in features:
                    if feature.find(unique_feature) != -1:
                        current = True
            data_addition.append(current)

        dataset[unique_feature] = data_addition
        
separate(data, filtered_unique_features, "Lisainfo")
data.drop(columns=["Lisainfo"], inplace=True)

Find unique features in "Kitchen"

In [23]:
unique_features = find_unique_features(data, "Kitchen")
unique_features

{'ceramic shove',
 'electric stove',
 'gas stove',
 'induktsioon pliit',
 'kitchen ',
 'kitchen 0.0 m²',
 'kitchen 1 m²',
 'kitchen 10 m²',
 'kitchen 10.0 m²',
 'kitchen 10.4 m²',
 'kitchen 10.7 m²',
 'kitchen 10.8 m²',
 'kitchen 10.9 m²',
 'kitchen 11 m²',
 'kitchen 11.3 m²',
 'kitchen 11.5 m²',
 'kitchen 11.7 m²',
 'kitchen 11.8 m²',
 'kitchen 12 m²',
 'kitchen 12.2 m²',
 'kitchen 12.3 m²',
 'kitchen 12.5 m²',
 'kitchen 12.7 m²',
 'kitchen 12.8 m²',
 'kitchen 12.9 m²',
 'kitchen 13 m²',
 'kitchen 13.4 m²',
 'kitchen 13.7 m²',
 'kitchen 137 m²',
 'kitchen 14 m²',
 'kitchen 14.1 m²',
 'kitchen 14.5 m²',
 'kitchen 14.6 m²',
 'kitchen 141.4 m²',
 'kitchen 15 m²',
 'kitchen 15.1 m²',
 'kitchen 15.5 m²',
 'kitchen 15.6 m²',
 'kitchen 15.8 m²',
 'kitchen 16 m²',
 'kitchen 16.0 m²',
 'kitchen 16.3 m²',
 'kitchen 16.7 m²',
 'kitchen 17 m²',
 'kitchen 17.7 m²',
 'kitchen 17.8 m²',
 'kitchen 18 m²',
 'kitchen 18.1 m²',
 'kitchen 18.5 m²',
 'kitchen 19 m²',
 'kitchen 19.3 m²',
 'kitchen 19.4 m²'

Extract numeric features from "Kitchen"

In [24]:
#Probably don't care about that feature, too few entries
data_kitchen_size = []

for i in data["Kitchen"].values:
    kitchen_size = np.nan
    
    if not pd.isna(i):
        features = i.strip().split(", ")
        
        for feature in features:
            if feature.find("m²") != -1:
                kitchen_size = float(feature.strip().split()[1])
    data_kitchen_size.append(kitchen_size)

data["Kitchen size"] = data_kitchen_size

Extract categorical features from "Kitchen"

In [25]:
data_addition = []

for i in data["Kitchen"].values:
    current = np.nan

    if not pd.isna(i):
        features = i.strip().split(", ")

        for feature in features:
            if feature.find("heated with firewood") != -1:
                current = ' '.join(feature.strip().split()[1:])
            elif feature.find("shove") != -1 or feature.find("stove") != -1 or feature.find("pliit") != -1:
                current = feature.strip().split()[0]
            
    data_addition.append(current)

data["Stove type"] = data_addition

Separate the remaining features

In [26]:
separate(data, {'kitchen furniture','open kitchen','refridgerator'}, "Kitchen")
data.drop(columns=["Kitchen"], inplace=True)

Find unique features in "Sanitary arrangements"

In [27]:
unique_features = find_unique_features(data, "Sanitary arrangements")
unique_features

{'bath',
 'local water',
 'new sewerage',
 'pool',
 'sauna',
 'sewerage',
 'shower',
 'toilet room and bathroom separate',
 'urban water',
 'washing machine',
 'water boiler'}

Extract features from "Sanitary arrangements"

In [28]:
separate(data, unique_features, "Sanitary arrangements")
data.drop(columns=["Sanitary arrangements"], inplace=True)

Find unique features in "Heating and ventilation"

In [29]:
unique_features = find_unique_features(data, "Heating and ventilation")
unique_features

{'central heating',
 'combined heating',
 'conditioner',
 'electric heating',
 'fireplace',
 'gas',
 'gas heating',
 'geothermic heating',
 'heated floors',
 'liquid fuel',
 'solid fuel',
 'stove heating',
 'õhksoojuspump'}

Extract categorical features from "Heating and ventilation"

In [30]:
data_addition = []

for i in data["Heating and ventilation"].values:
    current = np.nan

    if not pd.isna(i):
        features = i.strip().split(", ")

        for feature in features:
            if feature.find("heating") != -1:
                current = feature.strip().split()[0]
            elif feature.find("õhksoojuspump") != -1:
                current = "Air source heat pump"
    data_addition.append(current)

data["Heating type"] = data_addition

Remove the now separated features from 'unique features'

In [31]:
filtered_unique_features = [ x for x in unique_features if "heating" not in x and 
                                                            "õhksoojuspump" not in x ]

Separate the remaining features

In [32]:
separate(data, filtered_unique_features, "Heating and ventilation")
data.drop(columns=["Heating and ventilation"], inplace=True)

Find unique features in "Communications and security"

In [33]:
unique_features = find_unique_features(data, "Communications and security")
unique_features

{'Internet',
 'cable TV',
 'fenced with garden',
 'frontdoor locked',
 'neighbourhood watch',
 'phone',
 'secure guard',
 'security system',
 'steel door',
 'video cameras'}

Extract features from "Communications and security"

In [34]:
separate(data, unique_features, "Communications and security")
data.drop(columns=["Communications and security"], inplace=True)

Find unique features in "Neighbourhood"

In [35]:
unique_features = find_unique_features(data, "Neighbourhood")
unique_features

{'200 sea',
 '3 km sea',
 'Abja paisjärv lake ',
 'Abja paisjärv lake 1,000 m',
 'Abja paisjärv lake 300 m',
 'Alesti järv lake 11 km',
 'Alesti järv lake 8 km',
 'Alesti lake 8 km',
 'Amme jõgi river 250 m',
 'Amme jõgi river 350 m',
 'Amme river 300 m',
 'Arbi ja Verevi järv lake 400 m',
 'Arbi järv lake 1,000 m',
 'Arbi järv lake 700 m',
 'Aseri sea 5 km',
 'Audru river 1.4 km',
 'Audru river 400 m',
 'Avijõgi river',
 'Avijõgi river 300 m',
 'Balti meri sea 2 m',
 'Balti meri sea 200 m',
 'Balti meri sea 450 m',
 'Baltika sea ',
 'Elva Jõgi river 100 m',
 'Elva jõgi',
 'Elva jõgi river 1,000 m',
 'Emajõgi',
 'Emajõgi Emajõe river 1 m',
 'Emajõgi river',
 'Emajõgi river ',
 'Emajõgi river 1 m',
 'Emajõgi river 1,000 m',
 'Emajõgi river 100 m',
 'Emajõgi river 140 m',
 'Emajõgi river 180 m',
 'Emajõgi river 20 m',
 'Emajõgi river 200 m',
 'Emajõgi river 300 m',
 'Emajõgi river 350 m',
 'Emajõgi river 50 m',
 'Emajõgi river 500 m',
 'Emajõgi river 700 m',
 'Emajõgi river 800 m',
 'Ema

In [36]:
filtered_unique_features = [ x for x in unique_features if "sea" not in x and 
                                                            "meri" not in x and
                                                            "river" not in x and
                                                            "lake" not in x and
                                                            "tiik" not in x and
                                                            "Tiik" not in x and
                                                            "Oja" not in x and
                                                            "oja" not in x and
                                                            "karjäär" not in x and
                                                            "карьер" not in x and
                                                            "water body" not in x and
                                                            "beach length" not in x and
                                                            "laht" not in x and
                                                            "rand" not in x and
                                                            "jõgi" not in x and
                                                            "järv" not in x and
                                                            "море" not in x ]
filtered_unique_features

['location in suburb location',
 'roads in bad condition roads',
 'surrounding buildings warehouses and production buildings',
 'neighbours at one side neighbours',
 'roads in good condition roads',
 'surrounding buildings no buildings',
 'neighbours next to neighbours',
 'surrounding buildings private houses and apartment buildings',
 'roads in satisfactory condition roads',
 'surrounding buildings private houses',
 'near forest',
 'neighbours around neighbours',
 'roads gravel roads',
 'neighbours farther neighbours',
 'in the city location',
 'roads paved roads',
 'surrounding buildings commercial buildings',
 'surrounding buildings apartment buildings',
 'location in the center location',
 'location outside settlement location']

Extract features from "Neighbourhood

In [37]:
data_addition = []

for i in data["Neighbourhood"].values:
    current = np.nan

    if not pd.isna(i):
        features = i.strip().split(", ")

        for feature in features:
            if feature.find("condition") != -1:
                current = feature.strip().split()[2]
    data_addition.append(current)

data["Road condition"] = data_addition

In [38]:
data_addition = []

for i in data["Neighbourhood"].values:
    current = np.nan

    if not pd.isna(i):
        features = i.strip().split(", ")

        for feature in features:
            if feature.find("condition") == -1:
                if feature.find("roads") != -1:
                    current = feature.strip().split()[1]
    data_addition.append(current)

data["Road type"] = data_addition

In [39]:
data_addition = []

for i in data["Neighbourhood"].values:
    current = np.nan

    if not pd.isna(i):
        features = i.strip().split(", ")

        for feature in features:
            if feature.find("surrounding") != -1:
                current = ' '.join(feature.strip().split()[2:])
    data_addition.append(current)

data["Surrounding buildings"] = data_addition

In [40]:
data_addition = []

for i in data["Neighbourhood"].values:
    current = np.nan

    if not pd.isna(i):
        features = i.strip().split(", ")

        for feature in features:
            if feature.find("location") != -1:
                if feature.find("city") != -1:
                    current = ' '.join(feature.strip().split()[0:-1])
                else:
                    current = ' '.join(feature.strip().split()[1:-1])
    data_addition.append(current)

data["Location type"] = data_addition

In [41]:
data_addition = []

for i in data["Neighbourhood"].values:
    current = np.nan

    if not pd.isna(i):
        features = i.strip().split(", ")

        for feature in features:
            if feature.find("neighbours") != -1:
                current = ' '.join(feature.strip().split()[1:])
                
    data_addition.append(current)

data["Nearby neighbours"] = data_addition

In [42]:
def separate_synonyms(dataset, uniques, column):
    for unique_features in uniques:
        data_addition = []

        for i in data[column].values:
            current = False

            if not pd.isna(i):
                features = i.strip().split(", ")

                for feature in features:
                    for unique_feature in unique_features:
                        if feature.find(unique_feature) != -1:
                            current = True
            data_addition.append(current)
        dataset[unique_features[0]] = data_addition
    

separate_synonyms(data, 
                  [['forest'], ['sea', 'meri', 'laht', 'море', 'rand'], 
                   ['river', 'jõgi'], ['lake', 'järv'], 
                   ['stream', 'oja', 'Oja'], 
                   ['pond', 'tiik', 'Tiik']], 
                  "Neighbourhood")

data.drop(columns=["Neighbourhood"], inplace=True)

In [43]:
# Separate the location into latitude and longitude.
data[["Latitude", "Longitude"]] = data.Location.str.split(",", expand=True)
data["Latitude"] = data["Latitude"].astype(float)
data["Longitude"] = data["Longitude"].astype(float)
data.drop(columns=["Location"], inplace=True)
data.values[:][0]

array([69000.0, 5.0, 81.5, 2.0, 0, 4, 858.0, 0, nan, nan, nan, nan, nan,
       0, 1, 0, 0, 0, 0, 0, 0, 0.0, 0.0, nan, nan, nan, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, True, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, True, nan, nan, False, False, False,
       False, False, False, False, False, False, nan, False, False, False,
       False, False, False, False, False, False, False, False, False,
       'good', nan, nan, nan, nan, False, False, False, True, False,
       False, 58.8837293, 25.5777054], dtype=object)

In [51]:
data

data.dtypes

Unnamed: 0,Price,Rooms,Total area,Number of floors,Condition,Readiness,Ground area,Energy mark,Bedrooms,Built in year,Register number,Additional information,This floor/Number of floors,Ownership_apartment association,Ownership_apartment ownership,Ownership_building lease,Ownership_joint ownership,Ownership_logical part,Ownership_movable,Ownership_private property,Balcony,Balcony size,Distance from Tallinn,Parking,Wall,Roof,closed courtyard,terrace,public transport,packet windows,new sewerage,gas,electricity,garage,local water,high ceilings,deep well,cloak room,show case windows,furniture,fenced with garden,new electricity,goods lift,ventilation,liquid fuel,dressing room,furnishing possibility,urban water,kitchen,farm building,TV-set,near forest,sewerage,box-room,well,separate rooms,parquet,organisation through the few floors,basement,separate entryway,trestle,frontdoor locked,garret,open kitchen,water,lift,underground garage,street entrance,3*380V,Kitchen size,Stove type,kitchen furniture,refridgerator,water boiler,toilet room and bathroom separate,washing machine,shower,bath,sauna,pool,Heating type,fireplace,solid fuel,conditioner,heated floors,cable TV,steel door,video cameras,security system,secure guard,phone,neighbourhood watch,Internet,Road condition,Road type,Surrounding buildings,Location type,Nearby neighbours,forest,sea,river,lake,stream,pond,Latitude,Longitude
0,69000.0,5.0,81.5,2.000000,0,4,858.000000,0,3.259732,1985.422588,8.405146e+06,"underground garage, box-room, new electricity,...",1/2,0,1,0,0,0,0,0,0,0.0,0.0,free,cupboard,tar paper,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,11.541918,electric,False,False,False,False,False,False,False,False,False,central,False,False,False,False,False,False,False,False,False,False,False,False,good,paved,apartment buildings,in the center,around neighbours,False,False,False,True,False,False,58.883729,25.577705
1,240000.0,4.0,160.3,1.000000,6,4,69200.000000,0,3.000000,2019.000000,8.405146e+06,"underground garage, box-room, new electricity,...",1/2,0,0,0,0,0,0,1,0,0.0,0.0,free,cupboard,tin,True,False,False,False,False,False,True,True,True,False,False,False,False,False,True,True,False,True,False,True,False,False,True,True,False,False,False,False,True,True,True,False,False,True,False,False,False,False,False,False,False,False,False,11.541918,ceramic,False,True,True,False,False,True,True,True,False,geothermic,False,False,False,False,False,True,True,False,False,False,False,False,good,paved,apartment buildings,in the center,around neighbours,True,False,True,False,False,False,58.240052,27.017403
2,550000.0,6.0,267.3,2.000000,5,4,846.000000,0,4.000000,1985.422588,8.405146e+06,"underground garage, box-room, new electricity,...",1/2,0,0,0,0,0,0,1,1,9.7,0.0,free,cupboard,tar paper,False,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,True,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,11.541918,ceramic,True,True,False,True,False,True,True,True,False,geothermic,False,False,True,False,False,False,False,False,False,False,False,False,good,paved,private houses,in suburb,around neighbours,False,True,False,False,False,False,59.439420,24.572425
3,280000.0,4.0,122.0,2.000000,5,4,9831.410737,0,3.000000,2006.000000,8.606802e+06,"underground garage, box-room, new electricity,...",1/2,0,0,0,0,0,0,1,0,0.0,0.0,free,cupboard,tar paper,False,True,True,True,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,11.541918,electric,False,False,False,False,False,False,False,False,False,central,False,False,False,False,False,False,False,False,False,False,False,False,good,paved,apartment buildings,in the center,around neighbours,False,False,False,False,False,False,59.464121,24.397822
4,95000.0,3.0,81.0,1.000000,5,4,21100.000000,0,1.000000,1911.000000,8.405146e+06,"underground garage, box-room, new electricity,...",1/2,0,0,0,0,0,0,1,0,0.0,0.0,free,cupboard,asbestos cement,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,11.541918,electric,False,False,True,False,False,False,True,True,False,Air source heat pump,True,False,False,False,False,False,False,False,False,False,False,False,good,paved,apartment buildings,in the center,around neighbours,False,False,False,False,False,False,58.101587,26.187741
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,299500.0,4.0,123.4,1.889642,5,4,1702.000000,0,3.000000,2009.000000,8.405146e+06,"underground garage, box-room, new electricity,...",2/2,0,1,0,0,0,0,0,0,0.0,0.0,free,cupboard,profiled,False,False,False,False,False,True,False,False,False,True,False,False,False,True,False,False,False,True,False,True,True,True,False,True,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,11.541918,electric,True,False,False,False,False,False,False,False,False,gas,False,False,False,True,False,False,False,False,False,False,True,False,good,paved,apartment buildings,in the center,around neighbours,False,False,False,False,False,False,59.358238,24.807334
497,265000.0,4.0,98.9,2.000000,5,4,9831.410737,0,3.000000,2007.000000,8.405146e+06,"underground garage, box-room, new electricity,...",1/2,0,1,0,0,0,0,0,1,8.0,0.0,free,cupboard,tar paper,False,True,True,False,False,True,True,False,True,False,False,False,False,True,True,False,False,False,False,True,True,True,False,True,True,False,True,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,11.541918,induktsioon,True,True,True,False,True,True,True,True,False,gas,True,False,False,True,True,False,False,False,False,False,True,True,good,paved,apartment buildings,in the center,around neighbours,False,False,False,False,False,False,59.392273,24.816624
498,209000.0,5.0,188.8,2.000000,5,4,5905.000000,0,3.259732,1990.000000,8.405146e+06,"underground garage, box-room, new electricity,...",1/2,0,1,0,0,0,0,0,1,4.4,0.0,free,cupboard,tar paper,True,True,True,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,True,False,False,False,True,False,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,11.541918,electric,True,True,False,True,True,True,True,True,False,central,True,False,True,False,False,False,False,False,False,False,False,True,good,paved,apartment buildings,in the center,around neighbours,True,False,False,False,False,False,58.357860,26.784588
499,330000.0,4.0,144.3,2.000000,6,4,1799.000000,0,3.000000,2021.000000,8.405146e+06,"underground garage, box-room, new electricity,...",1/2,0,1,0,0,0,0,0,1,0.0,0.0,free,cupboard,tar paper,False,False,True,False,False,False,True,False,False,False,False,False,False,False,True,False,False,True,False,False,False,True,True,False,False,False,False,True,False,True,True,False,False,False,False,False,False,False,False,False,False,False,True,10.700000,electric,False,False,False,True,False,True,True,True,False,Air source heat pump,True,False,False,False,True,False,False,False,False,False,True,True,good,paved,apartment buildings,in the center,around neighbours,False,False,False,False,False,False,59.506560,24.847770


Price               float64
Rooms               float64
Total area          float64
Number of floors    float64
Condition             int64
                     ...   
lake                   bool
stream                 bool
pond                   bool
Latitude            float64
Longitude           float64
Length: 106, dtype: object

## Dealing with NaN values

In [45]:
data.dtypes

Price               float64
Rooms               float64
Total area          float64
Number of floors    float64
Condition             int64
                     ...   
lake                   bool
stream                 bool
pond                   bool
Latitude            float64
Longitude           float64
Length: 106, dtype: object

In [46]:
data.describe(include=[float, int, object, bool])

Unnamed: 0,Price,Rooms,Total area,Number of floors,Condition,Readiness,Ground area,Energy mark,Bedrooms,Built in year,Register number,Additional information,This floor/Number of floors,Balcony,Balcony size,Distance from Tallinn,Parking,Wall,Roof,closed courtyard,terrace,public transport,packet windows,new sewerage,gas,electricity,garage,local water,high ceilings,deep well,cloak room,show case windows,furniture,fenced with garden,new electricity,goods lift,ventilation,liquid fuel,dressing room,furnishing possibility,urban water,kitchen,farm building,TV-set,near forest,sewerage,box-room,well,separate rooms,parquet,organisation through the few floors,basement,separate entryway,trestle,frontdoor locked,garret,open kitchen,water,lift,underground garage,street entrance,3*380V,Kitchen size,Stove type,kitchen furniture,refridgerator,water boiler,toilet room and bathroom separate,washing machine,shower,bath,sauna,pool,Heating type,fireplace,solid fuel,conditioner,heated floors,cable TV,steel door,video cameras,security system,secure guard,phone,neighbourhood watch,Internet,Road condition,Road type,Surrounding buildings,Location type,Nearby neighbours,forest,sea,river,lake,stream,pond,Latitude,Longitude
count,7208.0,7057.0,7292.0,2655.0,7433.0,7433.0,2077.0,7433.0,1644.0,6136.0,866.0,547,4545,7433.0,7433.0,7433.0,3145,1315,1382,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,563.0,3457,7433,7433,7433,7433,7433,7433,7433,7433,7433,5659,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,2294,475,2132,2099,1510,7433,7433,7433,7433,7433,7433,7266.0,7266.0
unique,,,,,,,,,,,,476,161,,,,3,8,7,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,,5,2,2,2,2,2,2,2,2,2,7,2,2,2,2,2,2,2,2,2,2,2,2,3,2,6,4,4,2,2,2,2,2,2,,
top,,,,,,,,,,,,"underground garage, box-room, new electricity,...",1/2,,,,free,cupboard,tar paper,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,,electric,False,False,False,False,False,True,False,False,False,central,False,False,False,False,False,False,False,False,False,False,False,False,good,paved,apartment buildings,in the center,around neighbours,False,False,False,False,False,False,,
freq,,,,,,,,,,,,20,436,,,,2973,1076,355,6525,6550,5696,4882,5792,6817,4105,6406,6888,6780,7141,7421,7391,5777,6563,5878,7432,6803,7412,6447,5942,6151,6569,6640,6555,7099,4693,4836,6764,5421,4344,7036,6914,6901,7430,4618,7324,5581,6082,6319,7283,7074,6767,,1778,4922,5160,5765,5460,5569,4147,4518,5641,7346,2770,6529,7207,7150,5458,5229,5190,7000,6831,7397,6295,5852,4240,2189,441,791,1011,935,6581,6647,6843,6818,7427,7400,,
mean,173143.7,3.405696,217.855472,1.889642,3.709942,3.965155,9831.410737,0.672676,3.259732,1985.422588,8405146.0,,,0.250908,1.37351,0.200457,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.541918,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,58.952274,25.438428
std,224161.5,2.383852,2409.977495,0.910184,2.252275,0.282704,30238.696564,1.328388,1.795052,62.22669,16901840.0,,,0.433565,5.846968,4.628434,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10.522412,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.395378,3.386421
min,5000.0,1.0,1.0,1.0,0.0,0.0,2.4,0.0,1.0,1.0,16.0,,,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0
25%,60000.0,2.0,48.7,1.0,1.0,4.0,1018.0,0.0,3.0,1968.0,1782114.0,,,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,58.381713,24.647486
50%,128000.0,3.0,70.8,2.0,5.0,4.0,1890.0,0.0,3.0,1992.0,3047403.0,,,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,59.309796,24.829038
75%,215000.0,4.0,123.9,2.0,6.0,4.0,6200.0,1.0,4.0,2021.0,9863150.0,,,1.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12.4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,59.424213,26.601448


In [47]:
# data.dtypes
for column in data.describe(include=[object, bool]).columns:
    fill_na_with_mode(data, column)

for column in data.describe():
    fill_na_with_mean(data, column)

In [48]:
data.describe(include=[float, int, object, bool])

Unnamed: 0,Price,Rooms,Total area,Number of floors,Condition,Readiness,Ground area,Energy mark,Bedrooms,Built in year,Register number,Additional information,This floor/Number of floors,Balcony,Balcony size,Distance from Tallinn,Parking,Wall,Roof,closed courtyard,terrace,public transport,packet windows,new sewerage,gas,electricity,garage,local water,high ceilings,deep well,cloak room,show case windows,furniture,fenced with garden,new electricity,goods lift,ventilation,liquid fuel,dressing room,furnishing possibility,urban water,kitchen,farm building,TV-set,near forest,sewerage,box-room,well,separate rooms,parquet,organisation through the few floors,basement,separate entryway,trestle,frontdoor locked,garret,open kitchen,water,lift,underground garage,street entrance,3*380V,Kitchen size,Stove type,kitchen furniture,refridgerator,water boiler,toilet room and bathroom separate,washing machine,shower,bath,sauna,pool,Heating type,fireplace,solid fuel,conditioner,heated floors,cable TV,steel door,video cameras,security system,secure guard,phone,neighbourhood watch,Internet,Road condition,Road type,Surrounding buildings,Location type,Nearby neighbours,forest,sea,river,lake,stream,pond,Latitude,Longitude
count,7433.0,7433.0,7433.0,7433.0,7433.0,7433.0,7433.0,7433.0,7433.0,7433.0,7433.0,7433,7433,7433.0,7433.0,7433.0,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433.0,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433,7433.0,7433.0
unique,,,,,,,,,,,,476,161,,,,3,8,7,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,,5,2,2,2,2,2,2,2,2,2,7,2,2,2,2,2,2,2,2,2,2,2,2,3,2,6,4,4,2,2,2,2,2,2,,
top,,,,,,,,,,,,"underground garage, box-room, new electricity,...",1/2,,,,free,cupboard,tar paper,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,,electric,False,False,False,False,False,True,False,False,False,central,False,False,False,False,False,False,False,False,False,False,False,False,good,paved,apartment buildings,in the center,around neighbours,False,False,False,False,False,False,,
freq,,,,,,,,,,,,6906,3324,,,,7261,7194,6406,6525,6550,5696,4882,5792,6817,4105,6406,6888,6780,7141,7421,7391,5777,6563,5878,7432,6803,7412,6447,5942,6151,6569,6640,6555,7099,4693,4836,6764,5421,4344,7036,6914,6901,7430,4618,7324,5581,6082,6319,7283,7074,6767,,5754,4922,5160,5765,5460,5569,4147,4518,5641,7346,4544,6529,7207,7150,5458,5229,5190,7000,6831,7397,6295,5852,4240,7328,7399,6092,6345,6858,6581,6647,6843,6818,7427,7400,,
mean,173143.7,3.405696,217.855472,1.889642,3.709942,3.965155,9831.410737,0.672676,3.259732,1985.422588,8405146.0,,,0.250908,1.37351,0.200457,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.541918,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,58.952274,25.438428
std,220742.2,2.322768,2387.006962,0.54391,2.252275,0.282704,15981.733747,1.328388,0.844001,56.536788,5766193.0,,,0.433565,5.846968,4.628434,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.893548,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.379611,3.348158
min,5000.0,1.0,1.0,1.0,0.0,0.0,2.4,0.0,1.0,1.0,16.0,,,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0
25%,64000.0,2.0,48.8,1.889642,1.0,4.0,9831.410737,0.0,3.259732,1973.0,8405146.0,,,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.541918,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,58.382573,24.651575
50%,132000.0,3.0,72.0,1.889642,5.0,4.0,9831.410737,0.0,3.259732,1985.422588,8405146.0,,,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.541918,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,59.307851,24.836725
75%,210000.0,4.0,129.0,2.0,6.0,4.0,9831.410737,1.0,3.259732,2019.0,8405146.0,,,1.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.541918,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,59.422702,26.550106


In [49]:
data.isna()

Unnamed: 0,Price,Rooms,Total area,Number of floors,Condition,Readiness,Ground area,Energy mark,Bedrooms,Built in year,Register number,Additional information,This floor/Number of floors,Ownership_apartment association,Ownership_apartment ownership,Ownership_building lease,Ownership_joint ownership,Ownership_logical part,Ownership_movable,Ownership_private property,Balcony,Balcony size,Distance from Tallinn,Parking,Wall,Roof,closed courtyard,terrace,public transport,packet windows,new sewerage,gas,electricity,garage,local water,high ceilings,deep well,cloak room,show case windows,furniture,fenced with garden,new electricity,goods lift,ventilation,liquid fuel,dressing room,furnishing possibility,urban water,kitchen,farm building,TV-set,near forest,sewerage,box-room,well,separate rooms,parquet,organisation through the few floors,basement,separate entryway,trestle,frontdoor locked,garret,open kitchen,water,lift,underground garage,street entrance,3*380V,Kitchen size,Stove type,kitchen furniture,refridgerator,water boiler,toilet room and bathroom separate,washing machine,shower,bath,sauna,pool,Heating type,fireplace,solid fuel,conditioner,heated floors,cable TV,steel door,video cameras,security system,secure guard,phone,neighbourhood watch,Internet,Road condition,Road type,Surrounding buildings,Location type,Nearby neighbours,forest,sea,river,lake,stream,pond,Latitude,Longitude
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
497,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
498,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
499,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [50]:
data.to_csv("cleaned_data.csv", index=False)