In [1]:
# Importing Libraries

# Pandas for Data Manuplation
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Loading the dataset
dataset = pd.read_csv('Housing.csv')

In [3]:
# Display the first few rows of the dataset
print(dataset.head())

      price  area  bedrooms  bathrooms  stories mainroad guestroom basement  \
0  13300000  7420         4          2        3      yes        no       no   
1  12250000  8960         4          4        4      yes        no       no   
2  12250000  9960         3          2        2      yes        no      yes   
3  12215000  7500         4          2        2      yes        no      yes   
4  11410000  7420         4          1        2      yes       yes      yes   

  hotwaterheating airconditioning  parking prefarea furnishingstatus  
0              no             yes        2      yes        furnished  
1              no             yes        3       no        furnished  
2              no              no        2      yes   semi-furnished  
3              no             yes        3      yes        furnished  
4              no             yes        2       no        furnished  


In [None]:
# Preprocessing the dataset

In [4]:
# Checking for missing values
print(dataset.isnull().sum())

price               0
area                0
bedrooms            0
bathrooms           0
stories             0
mainroad            0
guestroom           0
basement            0
hotwaterheating     0
airconditioning     0
parking             0
prefarea            0
furnishingstatus    0
dtype: int64


In [5]:
# Converting categorical variables to dummy variables
dataset = pd.get_dummies(dataset, columns=['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus'], drop_first=True)

NameError: name 'data' is not defined

In [None]:
# Selecting features and target variable
X = dataset.drop(['price'], axis=1)  # Features
y = dataset['price']  # Target variable

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f'Training set size: {X_train.shape[0]}')
print(f'Testing set size: {X_test.shape[0]}')

In [None]:
# Create a Linear Regression model
model = LinearRegression()

In [None]:
# Fit the model to the training data
model.fit(X_train, y_train)

In [None]:
# Print the coefficients
print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)

In [None]:
# Calculate evaluation metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [None]:
# Printing evaluation model
print(f'Mean Absolute Error (MAE): {mae}')
print(f'Mean Squared Error (MSE): {mse}')
print(f'R² Score: {r2}')

In [None]:
# Plotting the regression line
plt.figure(figsize=(10, 6))
sns.regplot(x=y_test, y=y_pred, marker='o', color='blue')
plt.xlabel('Actual Price')
plt.ylabel('Predicted Price')
plt.title('Actual vs Predicted Price')
plt.show()