In [30]:
#Importing necessary Dependencies
#Pandas is used for data interpretation and cleaning.
#Sci-kit Learn is used for using a built-in multi-linear regression algorithm.
#Matplotlib is used for visualizing the data using graphs.
import pandas as pd
from sklearn import linear_model
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score

In [31]:
#Importing the dataset using Pandas
df = pd.read_csv('Dataset.csv')

In [32]:
#Printing the dataset
print(df)

        price  area  bedrooms  bathrooms  stories mainroad guestroom basement  \
0    13300000  7420         4          2        3      yes        no       no   
1    12250000  8960         4          4        4      yes        no       no   
2    12250000  9960         3          2        2      yes        no      yes   
3    12215000  7500         4          2        2      yes        no      yes   
4    11410000  7420         4          1        2      yes       yes      yes   
..        ...   ...       ...        ...      ...      ...       ...      ...   
540   1820000  3000         2          1        1      yes        no      yes   
541   1767150  2400         3          1        1       no        no       no   
542   1750000  3620         2          1        1      yes        no       no   
543   1750000  2910         3          1        1       no        no       no   
544   1750000  3850         3          1        2      yes        no       no   

    hotwaterheating aircond

In [33]:
#Using Python's built-in dictionary feature for removing strings ilke 'yes' and 'no' into 1 and 0 resp.
d = {'no':0, 'yes':1}

In [34]:
#Creating a new dictionary and mapping it to the dataset. 
e = {'unfurnished':0, 'semi-furnished':1, 'furnished':2}
df['furnishingstatus'] = df['furnishingstatus'].map(e)

In [35]:
#Mapping the 'd' dictionary with the required features in the dataset.
df['preffered'] = df['preffered'].map(d)

In [36]:
df['airconditioning'] = df['airconditioning'].map(d)

In [37]:
df['mainroad'] = df['mainroad'].map(d)

In [38]:
df['basement'] = df['basement'].map(d)

In [39]:
df['hotwaterheating'] = df['hotwaterheating'].map(d)

In [40]:
df['guestroom'] = df['guestroom'].map(d)

In [41]:
#Printing the modified dataset.
print(df)

        price  area  bedrooms  bathrooms  stories  mainroad  guestroom  \
0    13300000  7420         4          2        3         1          0   
1    12250000  8960         4          4        4         1          0   
2    12250000  9960         3          2        2         1          0   
3    12215000  7500         4          2        2         1          0   
4    11410000  7420         4          1        2         1          1   
..        ...   ...       ...        ...      ...       ...        ...   
540   1820000  3000         2          1        1         1          0   
541   1767150  2400         3          1        1         0          0   
542   1750000  3620         2          1        1         1          0   
543   1750000  2910         3          1        1         0          0   
544   1750000  3850         3          1        2         1          0   

     basement  hotwaterheating  airconditioning  parking  preffered  \
0           0                0          

In [42]:
#Scaling input data to be more readable and bringing forth accurate predictions.
#Here we are scaling area in 1000 sq ft., for e.g., 7000 sq ft would be taken as 7 as an input with 1000 as a multiplier in the algorithm.
def ScaledData(area):
    for i in df['area']:
        ScaledArea = area / 1000
        
    return ScaledArea

In [43]:
#Here we are scaling price in Million USD.
def ScaledPrice(price):
    for i in df['price']:
        ScaledPrice = price / 1000000
        
    return ScaledPrice

In [44]:
#Assigning the scaled data to a variable.
newprice = ScaledPrice(df['price'])
newarea = ScaledData(df['area'])

In [45]:
#Replacing the previous values with new Scaled Values.
df['price'] = newprice
df['area'] = newarea

In [46]:
#Printing the revised dataset.
print(df)

        price  area  bedrooms  bathrooms  stories  mainroad  guestroom  \
0    13.30000  7.42         4          2        3         1          0   
1    12.25000  8.96         4          4        4         1          0   
2    12.25000  9.96         3          2        2         1          0   
3    12.21500  7.50         4          2        2         1          0   
4    11.41000  7.42         4          1        2         1          1   
..        ...   ...       ...        ...      ...       ...        ...   
540   1.82000  3.00         2          1        1         1          0   
541   1.76715  2.40         3          1        1         0          0   
542   1.75000  3.62         2          1        1         1          0   
543   1.75000  2.91         3          1        1         0          0   
544   1.75000  3.85         3          1        2         1          0   

     basement  hotwaterheating  airconditioning  parking  preffered  \
0           0                0          

In [47]:
#Including features in 2 variables where X is the input variable and y is the output variable.
X = df[['area', 'bedrooms', 'bathrooms', 'stories', 'mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'parking', 'preffered', 'furnishingstatus']]
y = df['price']

In [48]:
#Defining the model and fitting the data into the model.
regr = linear_model.LinearRegression()
regr.fit(X.values, y.values)

In [49]:
#Creating a separate function to predict the values.
def Prediction(area, bedrooms, bathrooms, stories, mainroad, guestroom, basement, hotwaterheating, airconditioning, parking, preffered, furnishingstatus):
    predictedPrice = regr.predict([[area, bedrooms, bathrooms, stories, mainroad, guestroom, basement, hotwaterheating, airconditioning, parking, preffered, furnishingstatus]])
    return predictedPrice

In [51]:
#Taking user input and running them into the function defined above.
area = float(input("Enter the area in 1000 sq. ft: "))
bedrooms = float(input("Enter the number of bedrooms required: "))
bathrooms = int(input("Enter the number of bathrooms required: "))
stories = float(input("Enter the number of storeys you want: "))
mainroad = int(input("Do you want the house to be at mainroad /(0 for no and 1 for yes)/: "))
guestroom = int(input("Do you want the house to have a guestroom? /(0 for no and 1 for yes)/: "))
basement = int(input("Do you want the house to have a basement? /(0 for no and 1 for yes)/: "))
hotwaterheating = int(input("Do you want the house to have hot water heating systems? /(0 for no and 1 for yes)/: "))
airconditioning = int(input("Do you want the house to be air conditioned? /(0 for no and 1 for yes)/: "))
parking = int(input("Enter the number of vehicles you need parking for: "))
preffered = int(input("Do you want the house to be preferred by our experts? /(0 for no and 1 for yes)/: "))
furnishingstatus = int(input("Do you want the house to be furnished or semi-furnished or unfurnished? /(0 for unfurnished, 1 for semi-furnished and 2 for furnished)/ : "))

Prediction(area, bedrooms, bathrooms, stories, mainroad, guestroom, basement, hotwaterheating, airconditioning, parking, preffered, furnishingstatus)

print("The estimated price of the house (In Million USD) you're looking for is: " + str(Prediction(area, bedrooms, bathrooms, stories, mainroad, guestroom, basement, hotwaterheating, airconditioning, parking, preffered, furnishingstatus)))

Enter the area in 1000 sq. ft: 5
Enter the number of bedrooms required: 5
Enter the number of bathrooms required: 5
Enter the number of storeys you want: 5
Do you want the house to be at mainroad /(0 for no and 1 for yes)/: 1
Do you want the house to have a guestroom? /(0 for no and 1 for yes)/: 1
Do you want the house to have a basement? /(0 for no and 1 for yes)/: 1
Do you want the house to have hot water heating systems? /(0 for no and 1 for yes)/: 1
Do you want the house to be air conditioned? /(0 for no and 1 for yes)/: 1
Enter the number of vehicles you need parking for: 5
Do you want the house to be preferred by our experts? /(0 for no and 1 for yes)/: 1
Do you want the house to be furnished or semi-furnished or unfurnished? /(0 for unfurnished, 1 for semi-furnished and 2 for furnished)/ : 2
The estimated price of the house (In Million USD) you're looking for is: [13.96678292]


In [52]:
#Finding the values of coefficients for each feature in the linear regression.
print(regr.coef_)

[0.24390689 0.11947439 0.98888829 0.45039152 0.42310074 0.29803051
 0.35792636 0.87293603 0.85363359 0.27978564 0.6470556  0.21318778]
