# Scikit‑learn Linear Regression (baseline)

This notebook demonstrates a minimal **scikit‑learn** linear regression workflow:
- generate or load a tiny dataset (`data/linear_regression_samples.csv`),
- split train/test,
- fit `LinearRegression`,
- visualize predictions,
- compute R² on train & test.

In [None]:
import os, pandas as pd, numpy as np, matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

csv_path = "../data/linear_regression_samples.csv"
if not os.path.exists(csv_path):
    # Fallback: generate if missing
    rng = np.random.default_rng(0)
    x = 50 * rng.random((60, 1))
    y = 0.3 * x + 1.0 + rng.normal(0, 1, size=x.shape)
    df = pd.DataFrame({"x": x.ravel(), "y": y.ravel()})
else:
    df = pd.read_csv(csv_path)

X = df[["x"]].values
y = df["y"].values.reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

print("Intercept:", model.intercept_.ravel())
print("Coef:", model.coef_.ravel())

print("R2 train:", model.score(X_train, y_train))
print("R2 test:", model.score(X_test, y_test))

plt.figure()
plt.scatter(X, y, s=25)
y_hat = model.predict(X_test)
plt.scatter(X_test, y_hat, s=25)
plt.title("Linear Regression: blue=all, orange=predicted test")
plt.show()