In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [None]:
df = pd.read_csv("Housing.csv")
df.head()

In [None]:
yes_no_columns = [
    'mainroad', 'guestroom', 'basement',
    'hotwaterheating', 'airconditioning', 'prefarea'
]

for col in yes_no_columns:
    df[col] = df[col].map({'yes': True, 'no': False})

In [None]:
df[yes_no_columns].head()

In [None]:
df['furnishingstatus'] = df['furnishingstatus'].map({
    'furnished': 2,
    'semi-furnished': 1,
    'unfurnished': 0
})

In [None]:
df.isnull().sum()

In [None]:
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap of Housing Dataset")
plt.show()

In [None]:
plt.scatter(df['area'], df['price'])
plt.xlabel("Area (sqft)")
plt.ylabel("Price")
plt.title("Area vs House Price")
plt.show()

In [None]:
sns.boxplot(x=df['airconditioning'], y=df['price'])
plt.title("Price vs Air Conditioning")
plt.show()

In [None]:
X = df.drop('price', axis=1)
y = df['price']

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)
print(f"Accuracy Score: {r2_score(y_test, y_pred)*100 :.3f}")

In [None]:
feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': model.coef_
})

feature_importance['Abs_Coefficient'] = feature_importance['Coefficient'].abs()
feature_importance = feature_importance.sort_values(
    by='Abs_Coefficient', ascending=False
)

feature_importance

In [None]:
feature_importance[['Feature', 'Coefficient']].head(12)