## 0. Download dataset
**Note:** If you can't download using gdown due to limited number of downloads, please download it manually and upload it to your drive, then copy it from the drive to colab.
```python
from google.colab import drive

drive.mount('/content/drive')
!cp /path/to/dataset/on/your/drive .
```

In [1]:
# https://drive.google.com/file/d/1qeJqFtRdjjHqExbWJcgKy0yJbczTTAE3/view?usp=sharing
!gdown --id 1qeJqFtRdjjHqExbWJcgKy0yJbczTTAE3

Downloading...
From: https://drive.google.com/uc?id=1qeJqFtRdjjHqExbWJcgKy0yJbczTTAE3
To: /content/Housing.csv
100% 30.0k/30.0k [00:00<00:00, 58.2MB/s]


## 1. Import libraries and load dataset

In [2]:
import numpy as np
import polars as pl
import matplotlib.pyplot as plt

In [3]:
dataset_path = 'Housing.csv'

# Read data from .csv file
data = pl.read_csv(dataset_path)

In [4]:
data

price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
i64,i64,i64,i64,i64,str,str,str,str,str,i64,str,str
13300000,7420,4,2,3,"""yes""","""no""","""no""","""no""","""yes""",2,"""yes""","""furnished"""
12250000,8960,4,4,4,"""yes""","""no""","""no""","""no""","""yes""",3,"""no""","""furnished"""
12250000,9960,3,2,2,"""yes""","""no""","""yes""","""no""","""no""",2,"""yes""","""semi-furnished…"
12215000,7500,4,2,2,"""yes""","""no""","""yes""","""no""","""yes""",3,"""yes""","""furnished"""
11410000,7420,4,1,2,"""yes""","""yes""","""yes""","""no""","""yes""",2,"""no""","""furnished"""
10850000,7500,3,3,1,"""yes""","""no""","""yes""","""no""","""yes""",2,"""yes""","""semi-furnished…"
10150000,8580,4,3,4,"""yes""","""no""","""no""","""no""","""yes""",2,"""yes""","""semi-furnished…"
10150000,16200,5,3,2,"""yes""","""no""","""no""","""no""","""no""",0,"""no""","""unfurnished"""
9870000,8100,4,1,2,"""yes""","""yes""","""yes""","""no""","""yes""",2,"""yes""","""furnished"""
9800000,5750,3,2,4,"""yes""","""yes""","""no""","""no""","""yes""",1,"""yes""","""unfurnished"""


## 2. View the dataset

In [5]:
# Preview top 5 rows using head()
data.head()

price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
i64,i64,i64,i64,i64,str,str,str,str,str,i64,str,str
13300000,7420,4,2,3,"""yes""","""no""","""no""","""no""","""yes""",2,"""yes""","""furnished"""
12250000,8960,4,4,4,"""yes""","""no""","""no""","""no""","""yes""",3,"""no""","""furnished"""
12250000,9960,3,2,2,"""yes""","""no""","""yes""","""no""","""no""",2,"""yes""","""semi-furnished…"
12215000,7500,4,2,2,"""yes""","""no""","""yes""","""no""","""yes""",3,"""yes""","""furnished"""
11410000,7420,4,1,2,"""yes""","""yes""","""yes""","""no""","""yes""",2,"""no""","""furnished"""


In [6]:
# Preview last 5 rows using tail()
data.tail()

price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
i64,i64,i64,i64,i64,str,str,str,str,str,i64,str,str
1820000,3000,2,1,1,"""yes""","""no""","""yes""","""no""","""no""",2,"""no""","""unfurnished"""
1767150,2400,3,1,1,"""no""","""no""","""no""","""no""","""no""",0,"""no""","""semi-furnished…"
1750000,3620,2,1,1,"""yes""","""no""","""no""","""no""","""no""",0,"""no""","""unfurnished"""
1750000,2910,3,1,1,"""no""","""no""","""no""","""no""","""no""",0,"""no""","""furnished"""
1750000,3850,3,1,2,"""yes""","""no""","""no""","""no""","""no""",0,"""no""","""unfurnished"""


## 3. Understand some basic information about the data

In [7]:
data.describe()

describe,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
str,f64,f64,f64,f64,f64,str,str,str,str,str,f64,str,str
"""count""",545.0,545.0,545.0,545.0,545.0,"""545""","""545""","""545""","""545""","""545""",545.0,"""545""","""545"""
"""null_count""",0.0,0.0,0.0,0.0,0.0,"""0""","""0""","""0""","""0""","""0""",0.0,"""0""","""0"""
"""mean""",4766700.0,5150.541284,2.965138,1.286239,1.805505,,,,,,0.693578,,
"""std""",1870400.0,2170.141023,0.738064,0.50247,0.867492,,,,,,0.861586,,
"""min""",1750000.0,1650.0,1.0,1.0,1.0,"""no""","""no""","""no""","""no""","""no""",0.0,"""no""","""furnished"""
"""max""",13300000.0,16200.0,6.0,4.0,4.0,"""yes""","""yes""","""yes""","""yes""","""yes""",3.0,"""yes""","""unfurnished"""
"""median""",4340000.0,4600.0,3.0,1.0,2.0,,,,,,0.0,,
"""25%""",3430000.0,3600.0,2.0,1.0,1.0,,,,,,0.0,,
"""75%""",5740000.0,6360.0,3.0,2.0,2.0,,,,,,1.0,,


## 4. Data Selection

In [12]:
price = data['price']
price

price
i64
13300000
12250000
12250000
12215000
11410000
10850000
10150000
10150000
9870000
9800000


In [13]:
price = data[['price']]
price

price
i64
13300000
12250000
12250000
12215000
11410000
10850000
10150000
10150000
9870000
9800000


In [14]:
price = data.select('price')
price

price
i64
13300000
12250000
12250000
12215000
11410000
10850000
10150000
10150000
9870000
9800000


In [15]:
columns = data[['price', 'area', 'bedrooms', 'bathrooms']]
columns

price,area,bedrooms,bathrooms
i64,i64,i64,i64
13300000,7420,4,2
12250000,8960,4,4
12250000,9960,3,2
12215000,7500,4,2
11410000,7420,4,1
10850000,7500,3,3
10150000,8580,4,3
10150000,16200,5,3
9870000,8100,4,1
9800000,5750,3,2


In [16]:
columns = data.select(['price', 'area', 'bedrooms', 'bathrooms'])
columns

price,area,bedrooms,bathrooms
i64,i64,i64,i64
13300000,7420,4,2
12250000,8960,4,4
12250000,9960,3,2
12215000,7500,4,2
11410000,7420,4,1
10850000,7500,3,3
10150000,8580,4,3
10150000,16200,5,3
9870000,8100,4,1
9800000,5750,3,2


## 5. Data Slicing

In [21]:
data.slice(
    10, 5
)

price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
i64,i64,i64,i64,i64,str,str,str,str,str,i64,str,str
9800000,13200,3,1,2,"""yes""","""no""","""yes""","""no""","""yes""",2,"""yes""","""furnished"""
9681000,6000,4,3,2,"""yes""","""yes""","""yes""","""yes""","""no""",2,"""no""","""semi-furnished…"
9310000,6550,4,2,2,"""yes""","""no""","""no""","""no""","""yes""",1,"""yes""","""semi-furnished…"
9240000,3500,4,2,2,"""yes""","""no""","""no""","""yes""","""no""",2,"""no""","""furnished"""
9240000,7800,3,2,2,"""yes""","""no""","""no""","""no""","""no""",0,"""yes""","""semi-furnished…"


In [23]:
data.slice(
    20, 2
).select(
    ['price', 'area', 'bedrooms']
)

price,area,bedrooms
i64,i64,i64
8750000,4320,3
8680000,7155,3


## 6. Data Selection – Based on Conditional filtering

In [28]:
data.filter(
    (data['guestroom'] == 'yes') &
    (data['basement'] == 'no')
)[['price', 'area', 'bedrooms', 'stories']]

price,area,bedrooms,stories
i64,i64,i64,i64
9800000,5750,3,4
8890000,4600,3,2
7962500,6000,3,4
7350000,6000,4,4
7350000,6000,3,2
7245000,9000,4,4
7210000,7680,4,4
7210000,6000,3,4
7140000,6000,3,2
6650000,6000,3,3


## 7. Groupby operations

In [30]:
data.groupby('price').agg(pl.col('area').mean()).head()

price,area
i64,f64
3773000,5900.0
8645000,6305.0
6510000,6226.666667
4865000,4350.0
2310000,3180.0


## 8. Sorting operations

In [31]:
data.groupby('price').agg(pl.col('area').mean()).sort(['area'], descending=True).head()

price,area
i64,f64
5943000,15600.0
10150000,12390.0
7343000,11440.0
7000000,11175.0
6930000,11040.0


## 9. Check missing values

In [32]:
data.null_count()

price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
0,0,0,0,0,0,0,0,0,0,0,0,0


## 10. Dropping columns

In [33]:
data.drop(['furnishingstatus']).head()

price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea
i64,i64,i64,i64,i64,str,str,str,str,str,i64,str
13300000,7420,4,2,3,"""yes""","""no""","""no""","""no""","""yes""",2,"""yes"""
12250000,8960,4,4,4,"""yes""","""no""","""no""","""no""","""yes""",3,"""no"""
12250000,9960,3,2,2,"""yes""","""no""","""yes""","""no""","""no""",2,"""yes"""
12215000,7500,4,2,2,"""yes""","""no""","""yes""","""no""","""yes""",3,"""yes"""
11410000,7420,4,1,2,"""yes""","""yes""","""yes""","""no""","""yes""",2,"""no"""


## 11. Create new column

In [45]:
data.with_columns(
    pl.lit(0).alias('temp_column')
)

price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,temp_column
i64,i64,i64,i64,i64,str,str,str,str,str,i64,str,str,i32
13300000,7420,4,2,3,"""yes""","""no""","""no""","""no""","""yes""",2,"""yes""","""furnished""",0
12250000,8960,4,4,4,"""yes""","""no""","""no""","""no""","""yes""",3,"""no""","""furnished""",0
12250000,9960,3,2,2,"""yes""","""no""","""yes""","""no""","""no""",2,"""yes""","""semi-furnished…",0
12215000,7500,4,2,2,"""yes""","""no""","""yes""","""no""","""yes""",3,"""yes""","""furnished""",0
11410000,7420,4,1,2,"""yes""","""yes""","""yes""","""no""","""yes""",2,"""no""","""furnished""",0
10850000,7500,3,3,1,"""yes""","""no""","""yes""","""no""","""yes""",2,"""yes""","""semi-furnished…",0
10150000,8580,4,3,4,"""yes""","""no""","""no""","""no""","""yes""",2,"""yes""","""semi-furnished…",0
10150000,16200,5,3,2,"""yes""","""no""","""no""","""no""","""no""",0,"""no""","""unfurnished""",0
9870000,8100,4,1,2,"""yes""","""yes""","""yes""","""no""","""yes""",2,"""yes""","""furnished""",0
9800000,5750,3,2,4,"""yes""","""yes""","""no""","""no""","""yes""",1,"""yes""","""unfurnished""",0


In [46]:
data.with_columns(
    pl.col('price').alias('new_price')
)

price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,new_price
i64,i64,i64,i64,i64,str,str,str,str,str,i64,str,str,i64
13300000,7420,4,2,3,"""yes""","""no""","""no""","""no""","""yes""",2,"""yes""","""furnished""",13300000
12250000,8960,4,4,4,"""yes""","""no""","""no""","""no""","""yes""",3,"""no""","""furnished""",12250000
12250000,9960,3,2,2,"""yes""","""no""","""yes""","""no""","""no""",2,"""yes""","""semi-furnished…",12250000
12215000,7500,4,2,2,"""yes""","""no""","""yes""","""no""","""yes""",3,"""yes""","""furnished""",12215000
11410000,7420,4,1,2,"""yes""","""yes""","""yes""","""no""","""yes""",2,"""no""","""furnished""",11410000
10850000,7500,3,3,1,"""yes""","""no""","""yes""","""no""","""yes""",2,"""yes""","""semi-furnished…",10850000
10150000,8580,4,3,4,"""yes""","""no""","""no""","""no""","""yes""",2,"""yes""","""semi-furnished…",10150000
10150000,16200,5,3,2,"""yes""","""no""","""no""","""no""","""no""",0,"""no""","""unfurnished""",10150000
9870000,8100,4,1,2,"""yes""","""yes""","""yes""","""no""","""yes""",2,"""yes""","""furnished""",9870000
9800000,5750,3,2,4,"""yes""","""yes""","""no""","""no""","""yes""",1,"""yes""","""unfurnished""",9800000


## 12. apply() functions

In [35]:
def binary_2_numeric(decision):
    if decision == 'yes':
        return 1
    else:
        return 0

applied_data = data.with_columns(
    pl.col('mainroad').apply(binary_2_numeric),
    pl.col('guestroom').apply(binary_2_numeric),
    pl.col('basement').apply(binary_2_numeric),
    pl.col('hotwaterheating').apply(binary_2_numeric),
    pl.col('airconditioning').apply(binary_2_numeric),
    pl.col('prefarea').apply(binary_2_numeric)
)
applied_data

price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,str
13300000,7420,4,2,3,1,0,0,0,1,2,1,"""furnished"""
12250000,8960,4,4,4,1,0,0,0,1,3,0,"""furnished"""
12250000,9960,3,2,2,1,0,1,0,0,2,1,"""semi-furnished…"
12215000,7500,4,2,2,1,0,1,0,1,3,1,"""furnished"""
11410000,7420,4,1,2,1,1,1,0,1,2,0,"""furnished"""
10850000,7500,3,3,1,1,0,1,0,1,2,1,"""semi-furnished…"
10150000,8580,4,3,4,1,0,0,0,1,2,1,"""semi-furnished…"
10150000,16200,5,3,2,1,0,0,0,0,0,0,"""unfurnished"""
9870000,8100,4,1,2,1,1,1,0,1,2,1,"""furnished"""
9800000,5750,3,2,4,1,1,0,0,1,1,1,"""unfurnished"""
