In [53]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.metrics import RootMeanSquaredError,MeanSquaredError, MeanAbsoluteError
from numpy.linalg import inv
from sklearn.model_selection import train_test_split
from LLS import LLS
from LLS import my_train_test_split

In [54]:
data = pd.read_csv('input\housePrice.csv')
df = data.copy()
df.head()

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator,Address,Price,Price(USD)
0,63,1,True,True,True,Shahran,1850000000.0,61666.67
1,60,1,True,True,True,Shahran,1850000000.0,61666.67
2,79,2,True,True,True,Pardis,550000000.0,18333.33
3,95,2,True,True,True,Shahrake Qods,902500000.0,30083.33
4,123,2,True,True,True,Shahrake Gharb,7000000000.0,233333.33


### Preprocess

In [55]:
for i in range(len(df['Price(USD)'])):
    df.loc[i,'Price(USD)'] = df['Price'][i] / 50000

df.replace({'Address': ''}, np.nan).dropna(subset=['Address'])
df.reset_index(drop = True, inplace = True)
df.head()

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator,Address,Price,Price(USD)
0,63,1,True,True,True,Shahran,1850000000.0,37000.0
1,60,1,True,True,True,Shahran,1850000000.0,37000.0
2,79,2,True,True,True,Pardis,550000000.0,11000.0
3,95,2,True,True,True,Shahrake Qods,902500000.0,18050.0
4,123,2,True,True,True,Shahrake Gharb,7000000000.0,140000.0


In [56]:
# sorted(df['Address'].unique())
# len(df['Address'].unique())

#### Top 5 expensive houses

In [57]:
top_5_expensive = df.sort_values(by='Price',ascending=False).head(5).reset_index(drop=True)
top_5_expensive[['Address','Price']]

Unnamed: 0,Address,Price
0,Zaferanieh,92400000000.0
1,Abazar,91000000000.0
2,Lavasan,85000000000.0
3,Ekhtiarieh,81600000000.0
4,Niavaran,80500000000.0


In [58]:
df = df.drop('Address',axis=1)

In [59]:
correlation_with_target = df.corr()
top_2_features = correlation_with_target.index[1:5] 
print("Top 7 features selected for X:", top_2_features)

Top 7 features selected for X: Index(['Room', 'Parking', 'Warehouse', 'Elevator'], dtype='object')


#### Test and Train

In [60]:
X = data[['Area', 'Room', 'Parking', 'Warehouse', 'Elevator']].to_numpy().astype(int)
y = data[['Price']].to_numpy()

In [61]:
X_train, X_test, Y_train, Y_test = train_test_split(X,y,test_size=.2)

#### LLS

In [62]:
lls = LLS()
lls.fit(X_train,Y_train)

In [66]:
print(f"MAE: {round(lls.evaluate(X_test,Y_test,metric='mae'),0)}")
print(f"MSE: {round(lls.evaluate(X_test,Y_test,metric='mse'),0)}")
print(f"RMSE: {round(lls.evaluate(X_test,Y_test,metric='rmse'),0)}")

MAE: 3194377151.0
MSE: 4.081107352489401e+19
RMSE: 6388354524.0


In [64]:
mae = MeanAbsoluteError()
mae.update_state(y_pred=lls.predict(X_test),y_true=Y_test)
print(f'MAE Keras: {mae.result()}')

mae = MeanSquaredError()
mae.update_state(y_pred=lls.predict(X_test),y_true=Y_test)
print(f'MSE Keras: {mae.result()}')

mae = RootMeanSquaredError()
mae.update_state(y_pred=lls.predict(X_test),y_true=Y_test)
print(f'RMSE Keras: {mae.result()}')

MAE Keras: 3194376960.0
MSE Keras: 4.081107446546406e+19
RMSE Keras: 6388354048.0
