# Task 3: Linear Regression - AI & ML Internship
*Date: 2025-06-26*

This notebook implements **Simple and Multiple Linear Regression** on the Housing Price Prediction dataset.

## Step 1: Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


## Step 2: Load the Dataset

In [None]:
from google.colab import files
uploaded = files.upload()

# Assuming file name is 'Housing.csv'
df = pd.read_csv("Housing.csv")
df.head()

## Step 3: Preprocess the Dataset

In [None]:
# Convert yes/no columns to binary
binary_cols = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
for col in binary_cols:
    df[col] = df[col].map({'yes': 1, 'no': 0})

# One-hot encode 'furnishingstatus'
df = pd.concat([df.drop('furnishingstatus', axis=1), 
                pd.get_dummies(df['furnishingstatus'], drop_first=True)], axis=1)

df.head()

## Step 4: Split the Data into Train and Test Sets

In [None]:
X = df.drop('price', axis=1)
y = df['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Step 5: Train the Linear Regression Model

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train)

## Step 6: View Coefficients and Intercept

In [None]:
print("Intercept:", lr.intercept_)
print("Coefficients:", lr.coef_)

## Step 7: Make Predictions on Test Set

In [None]:
y_pred = lr.predict(X_test)
y_pred[:5]

## Step 8: Evaluate the Model

In [None]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R² Score:", r2)

## Step 9: Plotting (Optional for Simple Regression)

In [None]:
# Example using area if you want to visualize simple regression
if 'area' in X_test.columns:
    plt.scatter(X_test['area'], y_test, color='blue', label='Actual')
    plt.scatter(X_test['area'], y_pred, color='red', label='Predicted')
    plt.xlabel('Area')
    plt.ylabel('Price')
    plt.title('Linear Regression: Area vs Price')
    plt.legend()
    plt.show()