In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score


In [None]:
data = pd.read_csv('/content/Housing.csv')
data.head()


In [None]:
data.tail(5)

In [None]:
data.info()

In [None]:
data.columns
data.select_dtypes(include='object').columns


In [None]:
data.describe(include = 'all')


In [None]:
data.isnull().sum()

In [None]:
plt.hist(data[
'price'
])
plt.xlabel(
'Price'
)
plt.ylabel(
'Frequency'
)
plt.show()

In [None]:
sns.boxplot(x='furnishingstatus', y='price', data=data)
plt.xlabel('Furnishing Status')
plt.ylabel('Price')
plt.show()

In [None]:
plt.scatter(data[
'area'
], data[
'price'
])
plt.xlabel(
'Area'
)
plt.ylabel(
'Price'
)
plt.show()

In [None]:
sns.pairplot(data)
plt.show()

In [None]:
plt.hist(data[
'area'
], bins=
10
)
plt.xlabel(
'Area'
)
plt.ylabel(
'Frequency'
)
plt.show()

In [None]:
sns.kdeplot(data[
'price'
], shade=
True
)
plt.xlabel(
'Price'
)
plt.ylabel(
'Density'
)
plt.show()

In [None]:
data.columns

In [None]:
data.select_dtypes(include='object').columns

In [None]:
def binary_map(x):
    return x.map({'yes': 1, 'no': 0})


In [None]:
categorical_col = ['mainroad',
'guestroom'
,
'basement'
,
'hotwaterheating'
,
'airconditioning'
,
'prefarea'
]


In [None]:
data[categorical_col] = data[categorical_col].apply(binary_map)
data.head()

In [None]:
dummy_col = pd.get_dummies(data[
'furnishingstatus'
])
dummy_col.head()

In [None]:
dummy_col = pd.get_dummies(data[
'furnishingstatus'
], drop_first=
True
)
dummy_col.head()

In [None]:
data = pd.concat([data, dummy_col], axis=
1
)
data.head()


In [None]:


data.drop([
'furnishingstatus'
], axis=
1
, inplace=
True
)
data.head()

In [None]:
data.columns


In [None]:
np.random.seed(0)
df_train, df_test = train_test_split(data, train_size=0.7, test_size=0.3, random_state=100)
df_train.head()



In [None]:
df_train.shape

In [None]:
df_test.head()

In [None]:
df_test.shape


In [None]:
scaler = MinMaxScaler()
col_to_scale = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking', 'price']
df_train[col_to_scale] = scaler.fit_transform(df_train[col_to_scale])
df_train.head()

In [None]:
y_train = df_train.pop('price')
x_train = df_train
y_train.head()

In [None]:
linear_regression = LinearRegression()
linear_regression.fit(x_train, y_train)

In [None]:
coefficients = linear_regression.coef_
print
(coefficients)


In [None]:
score = linear_regression.score(x_train, y_train)
print
(score)


In [None]:
col_to_scale = ['area','bedrooms','bathrooms','stories','parking','price']
df_test[col_to_scale] = scaler.fit_transform(df_test[col_to_scale])
y_test = df_test.pop('price')
x_test = df_test
prediction = linear_regression.predict(x_test)
r2 = r2_score(y_test, prediction)
y_test_matrix = y_test.values.reshape(-1,1)
data_frame = pd.DataFrame({'actual': y_test_matrix.flatten(),'predicted': prediction.flatten()})
data_frame.head(10)

In [None]:


fig = plt.figure()
plt.scatter(y_test, prediction)
plt.title('Actual vs Prediction')
plt.xlabel('Actual', fontsize=15)
plt.ylabel('Predicted', fontsize=15)
