# Linear Regression - Example Datasets
This notebook demonstrates Linear Regression using different datasets.
The examples include datasets like California Housing, Diabetes, and Synthetic Data.

## Example 1: California Housing Prices
This dataset contains features related to California housing and aims to predict the median house value.

In [None]:

import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
data = fetch_california_housing()
X = data.data

y = data.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Results
print("📊 California Housing Dataset")
print(f"Mean Squared Error: {mse}")
print(f"R² Score: {r2}")


## Example 2: Diabetes Progression
This dataset is used to predict diabetes progression based on various health measurements.

In [None]:

import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
data = load_diabetes()
X = data.data
y = data.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Results
print("
📊 Diabetes Dataset")
print(f"Mean Squared Error: {mse}")
print(f"R² Score: {r2}")


## Example 3: Synthetic Regression Data
This example uses synthetic data generated for linear regression. It's great for testing and model validation.

In [None]:

from sklearn.datasets import make_regression

# Generate synthetic dataset
X, y = make_regression(n_samples=200, n_features=3, noise=10, random_state=42)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Results
print("
📊 Synthetic Regression Dataset")
print(f"Mean Squared Error: {mse}")
print(f"R² Score: {r2}")


## Example 4: Medical Insurance Cost Prediction
This dataset uses features like age, BMI, and smoking habits to predict insurance costs.

In [None]:

import pandas as pd

# Load dataset from a CSV link
data = pd.read_csv('https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv')
X = data[['age', 'bmi', 'children']]  # Example features
y = data['charges']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Results
print("
📊 Medical Insurance Cost Prediction")
print(f"Mean Squared Error: {mse}")
print(f"R² Score: {r2}")


## Example 5: Student Study Hours vs Scores
This is a small dataset that correlates study hours with exam scores.

In [None]:

# Creating a small dataset for Student Study Hours vs Scores
import pandas as pd
data = pd.DataFrame({
    'hours_studied': [1, 2, 3, 4, 5, 6, 7, 8],
    'score': [20, 30, 50, 55, 65, 75, 85, 95]
})
X = data[['hours_studied']]
y = data['score']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Results
print( "📊"
("Student Study Hours vs Scores")
print(f"Mean Squared Error: {mse}")
print(f"R² Score: {r2}")
)