In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import LabelEncoder

In [2]:
# Step 1: Load the dataset from a file
file_path = "/content/Housing.csv"  # Replace with the actual file path
df = pd.read_csv(file_path)

In [3]:
# Step 2: Preprocessing
categorical_columns = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']
le = LabelEncoder()
for column in categorical_columns:
    if column in df.columns:
        df[column] = le.fit_transform(df[column])

In [4]:
# Step 3: Feature Selection
X = df.drop(columns=["price"])  # Replace "price" with the actual column name for the target variable if different
y = df["price"]



In [5]:
# Step 4: Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 6: Make predictions
y_pred = model.predict(X_test)

In [6]:
# Step 7: Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
# Output Results
print("Mean Absolute Error (MAE):", mae)
print("R-squared (R2 Score):", r2)


Mean Absolute Error (MAE): 979679.6912959901
R-squared (R2 Score): 0.6494754192267803


In [7]:

# Optional: Display the coefficients
print("Feature Coefficients:")
for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef}")

Feature Coefficients:
area: 235.84877178813724
bedrooms: 78574.49238760071
bathrooms: 1097117.2661479574
stories: 406223.16446288454
mainroad: 366824.1923924831
guestroom: 233146.76562655077
basement: 393159.7787258404
hotwaterheating: 687881.3109570158
airconditioning: 785550.5792954286
parking: 225756.5139762815
prefarea: 629901.6608459124
furnishingstatus: -210397.11827155566
