# Linear Regression (Ordinary Least Squares)
Scenario:  
You are predicting house prices based on square footage.

In [41]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

data = pd.DataFrame({
    'Square Footage': [500, 750, 1000, 1250, 1500, 1750, 2000],
    'Price': [150000, 200000, 250000, 275000, 300000, 325000, 400000]
})

data

Unnamed: 0,Square Footage,Price
0,500,150000
1,750,200000
2,1000,250000
3,1250,275000
4,1500,300000
5,1750,325000
6,2000,400000


In [42]:
fig = px.scatter(data, x = 'Square Footage', y = 'Price', title = 'House Prices', template = 'plotly_dark')
fig.update_traces(marker = dict(size = 15))
fig.show()

In [43]:
# Define figures x and y
x = data[['Square Footage']]
y = data[['Price']]

In [44]:
# Create a linear regression model
model = LinearRegression()
model.fit(x, y)

In [45]:
# Make predictions
predictions = model.predict(x)

In [46]:
# Evaluae the model
mse = mean_squared_error(y, predictions)
r2 = r2_score(y, predictions)

print(f'Mean Squared Error: {mse}')
print(f'R2 Score: {r2}')

Mean Squared Error: 165816326.5306124
R2 Score: 0.9713656387665198


In [51]:
# Visualize
scatter = px.scatter(
    data,
    x='Square Footage',
    y='Price',
    title='Linear Regression: House Prices',
    labels={'Square Footage': 'Square Footage', 'Price': 'Price'},
    template = 'plotly_dark'
)

# Add regression line (using Predicted Price)
regression_line = go.Scatter(
    x=data['Square Footage'],
    y= predictions.flatten(),
    mode='lines',
    name='Regression Line',
    line=dict(color='red')
)

# Combine scatter plot and regression line
scatter.add_trace(regression_line)
scatter.show()