In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
# Generate synthetic data
X = df['X2 house age'].values.reshape(-1, 1)  # X2 house age
y = df['Y house price of unit area'].values.reshape(-1, 1)  # Y house price of unit area

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Plot the original data
plt.scatter(X, y, label='Original Data')
plt.xlabel('X')
plt.ylabel('y')
plt.title('Original Data')
plt.show()

# Function to add polynomial features
def add_polynomial_features(X, degree):
    return np.concatenate([X**i for i in range(1, degree + 1)], axis=1)

# Function to perform polynomial regression using gradient descent
def polynomial_regression(X, y, degree, learning_rate=0.001, epochs=5000):
    m, n = X.shape
    theta = np.random.randn(n, 1)

    for epoch in range(epochs):
        y_pred = X.dot(theta)
        error = y_pred - y
        gradient = 2/m * X.T.dot(error)
        theta -= learning_rate * gradient

    return theta

# Function to make predictions using the polynomial regression model
def predict(X, theta):
    return X.dot(theta)

# Function to plot the polynomial regression model
def plot_polynomial_regression(X, y, theta, degree):
    X_range = np.linspace(0, 2, 100).reshape(-1, 1)
    X_poly_range = add_polynomial_features(X_range, degree)
    y_pred = predict(X_poly_range, theta)

    plt.scatter(X, y, label='Original Data')
    plt.plot(X_range, y_pred, 'r-', label=f'Degree {degree} Polynomial Regression')
    plt.xlabel('X')
    plt.ylabel('y')
    plt.title(f'Polynomial Regression (Degree {degree})')
    plt.legend()
    plt.show()

# Perform polynomial regression for different degrees
degrees = [1, 4, 15]

for degree in degrees:
    X_poly_train = add_polynomial_features(X_train, degree)
    theta = polynomial_regression(X_poly_train, y_train, degree)

    # Plot the polynomial regression model
    plot_polynomial_regression(X_test, y_test, theta, degree)

    # Calculate and print the mean squared error on the test set
    X_poly_test = add_polynomial_features(X_test, degree)
    y_pred_test = predict(X_poly_test, theta)
    mse = np.mean((y_pred_test - y_test)**2)
    print(f'Mean Squared Error (Degree {degree}): {mse}')
