In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('housePrice.csv')

In [3]:
df.head()

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator,Address,Price,Price(USD)
0,63,1,True,True,True,Shahran,1850000000.0,61666.67
1,60,1,True,True,True,Shahran,1850000000.0,61666.67
2,79,2,True,True,True,Pardis,550000000.0,18333.33
3,95,2,True,True,True,Shahrake Qods,902500000.0,30083.33
4,123,2,True,True,True,Shahrake Gharb,7000000000.0,233333.33


In [4]:
df['Price'] = df['Price(USD)'] * 84000

In [25]:
df = df.sort_values(by=['Price'], ascending=False).reset_index(drop=True)
df.head()

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator,Address,Price,Price(USD)
0,420,4,True,True,True,Zaferanieh,258720000000.0,3080000.0
1,705,5,True,True,False,Abazar,254800000000.0,3033333.33
2,400,5,True,True,False,Lavasan,238000000000.0,2833333.33
3,680,5,True,True,False,Ekhtiarieh,228480000000.0,2720000.0
4,350,4,True,True,True,Niavaran,225400000000.0,2683333.33


In [26]:
addresses = df['Address']
addresses = addresses[:5]
addresses

0    Zaferanieh
1        Abazar
2       Lavasan
3    Ekhtiarieh
4      Niavaran
Name: Address, dtype: object

In [27]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
df['Address'] = encoder.fit_transform(df['Address'])

df['Parking'] = df['Parking'].astype(bool)
df['Warehouse'] = df['Warehouse'].astype(bool)
df['Elevator'] = df['Elevator'].astype(bool)

df['Price'] = df['Price'].astype(str).str.strip().replace(',', '').astype(float)

In [28]:
df.head()

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator,Address,Price,Price(USD)
0,420,4,True,True,True,189,258720000000.0,3080000.0
1,705,5,True,True,False,0,254800000000.0,3033333.33
2,400,5,True,True,False,87,238000000000.0,2833333.33
3,680,5,True,True,False,43,228480000000.0,2720000.0
4,350,4,True,True,True,105,225400000000.0,2683333.33


In [29]:
from train_test_split import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    df.iloc[:, :6].to_numpy(), df.iloc[:, 6].to_numpy(), shuffle=True, random_state=42
)

In [30]:
X_train, X_test = X_train.astype(float), X_test.astype(float)
y_train, y_test = y_train.astype(float), y_test.astype(float)

In [31]:
from lls import LLSRegression

model = LLSRegression()
model.fit(X_train, y_train)
print(f"RMSE: {model.score(X_test, y_test, metric='RMSE')}")
print(f"MSE: {model.score(X_test, y_test, metric='MSE')}")
print(f"MAE: {model.score(X_test, y_test, metric='MAE')}")

RMSE: 16680784001.404161
MSE: 2.7824855490150105e+20
MAE: 8262213286.702719


In [32]:
import numpy as np

def evaluate(y_test, y_predict):
    print(f'RMSE: {np.sqrt(np.mean((y_test - y_predict) ** 2))}')
    print(f'MSE: {np.mean((y_test - y_predict) ** 2)}')
    print(f'MAE: {np.mean(np.abs(y_test - y_predict))}')

In [33]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)
print(f'score: {model.score(X_test, y_test)}')
y_predict = model.predict(X_test)
evaluate(y_test, y_predict)

score: 0.5069351639801083
RMSE: 15885814426.776382
MSE: 2.523591000019766e+20
MAE: 7939318381.55609


In [34]:
from sklearn.linear_model import RidgeCV

model = RidgeCV()
model.fit(X_train, y_train)
print(f'score: {model.score(X_test, y_test)}')
y_predict = model.predict(X_test)
evaluate(y_test, y_predict)

score: 0.5069090192015089
RMSE: 15886235594.100765
MSE: 2.523724813512741e+20
MAE: 7928598133.92898
