<a href="https://colab.research.google.com/github/AmirTheFarmer/Property-price-prediction-in-Tehran/blob/main/ML_Tehran_House_Price.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Review the initial dataset**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Install requirements**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
from google.colab import files

In [None]:
df= pd.read_csv("/content/drive/MyDrive/Tehran_house_price.csv")

In [None]:
df

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator,Address,Price,Price(USD)
0,63,1,True,True,True,Shahran,1.850000e+09,61666.67
1,60,1,True,True,True,Shahran,1.850000e+09,61666.67
2,79,2,True,True,True,Pardis,5.500000e+08,18333.33
3,95,2,True,True,True,Shahrake Qods,9.025000e+08,30083.33
4,123,2,True,True,True,Shahrake Gharb,7.000000e+09,233333.33
...,...,...,...,...,...,...,...,...
3474,86,2,True,True,True,Southern Janatabad,3.500000e+09,116666.67
3475,83,2,True,True,True,Niavaran,6.800000e+09,226666.67
3476,75,2,False,False,False,Parand,3.650000e+08,12166.67
3477,105,2,True,True,True,Dorous,5.600000e+09,186666.67


# **Edit dataset**
Attention: Run each cell once

In [None]:
# Run this cell just once!!!! Datas will convert to NaN if run it twice
mapping = {True: 1, False: 0}
df['Parking'] = df['Parking'].map(mapping)

In [None]:
# Run this cell just once!!!! Datas will convert to NaN if run it twice
mapping = {True: 1, False: 0}
df['Warehouse'] = df['Warehouse'].map(mapping)

In [None]:
# Run this cell just once!!!! Datas will convert to NaN if run it twice
mapping = {True: 1, False: 0}
df['Elevator'] = df['Elevator'].map(mapping)

In [None]:
# calculate different locations
Address= df['Address']
Address.value_counts()

Unnamed: 0_level_0,count
Address,Unnamed: 1_level_1
Punak,161
Pardis,146
West Ferdows Boulevard,145
Gheitarieh,141
Shahran,130
...,...
Firoozkooh,1
Shadabad,1
Naziabad,1
Javadiyeh,1


# **Donwload the latest version off dataset**

In [None]:
df.to_csv('/content/drive/My Drive/Tehran_house_price_Edited.csv', index=False)
files.download('/content/drive/My Drive/Tehran_house_price_Edited.csv')
print("File downloaded successfully!")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

File downloaded successfully!


## **Now should convert "Address" to digits**

In [None]:
df_encoded = pd.get_dummies(df, columns=['Address'], prefix='area')
columns= df_encoded.columns[7:]
columns_list = columns.tolist()

In [None]:
mapping = {True: 1, False: 0}
for i in range(len(columns_list)):
  df_encoded[columns_list[i]] = df_encoded[columns_list[i]].map(mapping)

# **Now should convert all data into int64**

In [None]:
# 'Price', 'Price(USD)' columns
columns_to_convert = ['Price', 'Price(USD)']
df_encoded[columns_to_convert] = df_encoded[columns_to_convert].astype('int64')

In [None]:
# 'Area' column
def safe_convert(value):
    try:
        return int(value)
    except (ValueError, TypeError):
        return 0  # or np.nan if you prefer

df_encoded['Area'] = df['Area'].apply(safe_convert).astype('int64')

# **Start ML progress**

In [None]:
columns_to_exclude = ['Price','Price(USD)']
X = df_encoded.drop(columns_to_exclude, axis=1)
Y= df_encoded["Price(USD)"]

In [None]:
#train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size = 0.3, random_state = 4)

In [None]:
# Import library for Linear Regression
from sklearn.linear_model import LinearRegression

# Create a Linear regressor
mlr = LinearRegression()

# Train the model using the training sets
mlr.fit(X_train, y_train)

In [None]:
mlr.intercept_

np.float64(-120257.18172926575)

In [None]:
mlr.coef_

In [None]:
#Converting the coefficient values to a dataframe
coeffcients = pd.DataFrame([X_train.columns,mlr.coef_]).T
coeffcients = coeffcients.rename(columns={0: 'Attribute', 1: 'Coefficients'})
coeffcients

Unnamed: 0,Attribute,Coefficients
0,Area,2630.12653
1,Room,-896.267458
2,Parking,-25021.447447
3,Warehouse,-16126.616793
4,Elevator,-4130.802453
...,...,...
192,area_Yousef Abad,90164.498079
193,area_Zafar,106891.892117
194,area_Zaferanieh,454138.13378
195,area_Zargandeh,87265.687546


# **Model(MLR) Evaluation**

In [None]:
y_pred = mlr.predict(X_train)

In [None]:
# Model Evaluation
from sklearn import metrics
print('R^2:',metrics.r2_score(y_train, y_pred))
print('MAE:',metrics.mean_absolute_error(y_train, y_pred))
print('MSE:',metrics.mean_squared_error(y_train, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_train, y_pred)))

R^2: 0.7503573467126177
MAE: 62725.14899071075
MSE: 19031839263.559593
RMSE: 137955.93232463617
