In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

# Load Boston dataset using OpenML (since load_boston is deprecated)
boston = fetch_openml(name='boston', version=1, as_frame=True)
X = boston.data[['RM']]  # Using only 'RM' feature (average number of rooms per dwelling)
y = boston.target

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Evaluation
print("R^2 Score:", r2_score(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("Intercept:", model.intercept_)
print("Coefficient (slope):", model.coef_[0])

# Visualization
plt.scatter(X_test, y_test, color='blue', label='Actual Prices')
plt.plot(X_test, y_pred, color='red', linewidth=2, label='Predicted Line')
plt.xlabel("Average Number of Rooms per Dwelling (RM)")
plt.ylabel("Median House Price ($1000s)")
plt.title("Linear Regression on Boston Housing (Feature: RM)")
plt.legend()
plt.grid(True)
plt.show()


# poly

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

# Load Auto MPG dataset
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data"
column_names = ['mpg','cylinders','displacement','horsepower','weight','acceleration','model_year','origin','car_name']
data = pd.read_csv(url, names=column_names, na_values='?', comment='\t', sep='\s+', skipinitialspace=True)

# Drop rows with missing values
data.dropna(inplace=True)

# Use 'horsepower' as the independent variable (reshaped for sklearn)
X = data[['horsepower']]
y = data['mpg']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply Polynomial Features (degree = 2 or 3)
poly = PolynomialFeatures(degree=2)  # You can change to degree=3 for more flexibility
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Train polynomial regression model
model = LinearRegression()
model.fit(X_train_poly, y_train)

# Predict
y_pred = model.predict(X_test_poly)

# Evaluation
print("Polynomial Regression R^2 Score:", r2_score(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

# Visualization
plt.scatter(X_test, y_test, color='blue', label='Actual MPG')
# Sort X_test for smooth curve plotting
X_test_sorted = np.sort(X_test.values.reshape(-1))
X_test_sorted_poly = poly.transform(X_test_sorted.reshape(-1, 1))
y_pred_sorted = model.predict(X_test_sorted_poly)
plt.plot(X_test_sorted, y_pred_sorted, color='red', label='Polynomial Fit', linewidth=2)

plt.xlabel("Horsepower")
plt.ylabel("Miles Per Gallon (MPG)")
plt.title("Polynomial Regression on Auto MPG Dataset")
plt.legend()
plt.grid(True)
plt.show()
