In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

#load dataset
df = pd.read_csv('/Housing.csv')  # Update path as needed

#basic info
print("Basic Info:\n", df.info())
print("Summary stats:\n", df.describe())
print("Null values:\n", df.isnull().sum())

#convert categorical columns using one-hot encoding
df = pd.get_dummies(df, drop_first=True)

#feature & target
X = df.drop(['price'], axis=1)
y = df['price']

#train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

#predictions
y_pred = model.predict(X_test)

#evaluation
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nEvaluation Metrics:")
print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"R² Score: {r2:.2f}")

#Coefficients interpretation
coeff_df = pd.DataFrame(model.coef_, index=X.columns, columns=["Coefficient"])
print("\nModel Coefficients:\n", coeff_df)

#plotting regression (only if single feature for visualization)
if X.shape[1] == 1:
    plt.scatter(X_test, y_test, color='blue', label='Actual')
    plt.plot(X_test, y_pred, color='red', label='Predicted')
    plt.xlabel(X.columns[0])
    plt.ylabel("Price")
    plt.title("Linear Regression Fit")
    plt.legend()
    plt.show()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   price             545 non-null    int64 
 1   area              545 non-null    int64 
 2   bedrooms          545 non-null    int64 
 3   bathrooms         545 non-null    int64 
 4   stories           545 non-null    int64 
 5   mainroad          545 non-null    object
 6   guestroom         545 non-null    object
 7   basement          545 non-null    object
 8   hotwaterheating   545 non-null    object
 9   airconditioning   545 non-null    object
 10  parking           545 non-null    int64 
 11  prefarea          545 non-null    object
 12  furnishingstatus  545 non-null    object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB
Basic Info:
 None
Summary stats:
               price          area    bedrooms   bathrooms     stories  \
count  5.450000e+02    545.000000  545.000000  