## **Linear Regression**

Linear regression is a statistical technique that uses a linear equation to model the relationship between variables. It's used to predict the value of one variable based on the value of another variable


**Imports**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

**Data Loading**

In [None]:
# Load dataset
df = pd.read_csv(".csv")  # Replace with actual dataset

# Display first few rows
df.head()

**Minimal Preprocessing**

In [None]:
# Check for missing values
print(df.isnull().sum())

# Drop rows with missing values (if any)
df = df.dropna()

# Selecting feature(s) and target variable
X = df[['SquareFeet']]  # Independent variable
y = df['Price']  # Dependent variable

# Splitting data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

**Model Building**

In [None]:
# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Display model coefficients
print("Intercept:", model.intercept_)
print("Coefficient:", model.coef_)

**Predictions**

In [None]:
# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Display model coefficients
print("Intercept:", model.intercept_)
print("Coefficient:", model.coef_)

**Performance Metrics**

In [None]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R²): {r2}")


**Visualizations**

In [None]:
# Scatter plot of actual vs predicted values
plt.figure(figsize=(8, 6))
sns.scatterplot(x=y_test, y=y_pred, color='blue', alpha=0.7)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='dashed')
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual vs Predicted Prices")
plt.show()

# Visualizing the regression line
plt.figure(figsize=(8, 6))
sns.scatterplot(x=X_train.values.flatten(), y=y_train, color='blue', label="Training Data")
sns.scatterplot(x=X_test.values.flatten(), y=y_test, color='green', label="Testing Data")
plt.plot(X_test.values.flatten(), y_pred, color='red', linewidth=2, label="Regression Line")
plt.xlabel("Square Feet")
plt.ylabel("Price")
plt.title("Linear Regression Fit")
plt.legend()
plt.show()