# Supervised Learning Coursework 1 Coding Part

In [56]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

##### Initialise condition for question a

In [2]:
data_x = np.array([1, 2, 3, 4])
data_y = np.array([3, 2, 0, 5])

#### Question 1

In [177]:

def extract_features(x, degree):
    return np.array([x**i for i in range(degree + 1)]).T

def linear_regression(X, y):
    return np.linalg.solve(X.T @ X, X.T @ y)

##### Question 1a

In [None]:
plt.figure(figsize=(10, 6))
x_plot = np.linspace(0, 5, 500)

for degree in range(0, 4):
    X = extract_features(data_x, degree)
    coeffs = linear_regression(X, data_y).round(2)
    X_plot = extract_features(x_plot, degree)
    y_plot = X_plot @ coeffs
    plt.plot(x_plot, y_plot, label=f'k={degree}')

plt.scatter(data_x, data_y, color='red', label='Data Points')
plt.xlabel('x')
plt.ylabel('y')
plt.ylim(-5, 10)
plt.title('Polynomial Fits for Different Degrees')
plt.legend()
plt.grid(True)
ax = plt.gca()
ax.spines['bottom'].set_position(('data', 0))
ax.spines['left'].set_position(('data', 0))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.show()

##### Question 1b

In [None]:
for degree in range(0, 4):
    X = extract_features(data_x, degree)
    coeffs = linear_regression(X, data_y).round(2)
    print(f"Degree {degree+1} polynomial coefficients: {coeffs}")

##### Question 1c

In [None]:
for degree in range(0, 4):
    X = extract_features(data_x, degree)
    coeffs = linear_regression(X, data_y)
    coeffs = coeffs.round(2)
    y_pred = X @ coeffs
    print(y_pred)
    mse = np.mean((data_y - y_pred) ** 2)
    print(f"MSE for k={degree}: {mse}")

#### Question 2

##### Question 2a i

In [None]:
def g_sigma(x, sigma):
    noise = np.random.normal(0, sigma, size=len(x))
    return np.sin(2 * np.pi * x)**2 + noise

# 生成数据并绘图
np.random.seed(0)
x_sample = np.random.uniform(0,1,30)
y_sample = g_sigma(x_sample, sigma=0.07)

x_plot = np.linspace(0, 1, 500)
y_plot = np.sin(2 * np.pi * x_plot)**2

plt.figure(figsize=(10, 6))
plt.plot(x_plot, y_plot, label='sin^2(2πx)')
plt.scatter(x_sample, y_sample, color='red', label='Noisy Data Points')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Function with Noise')
plt.legend()
plt.grid(True)
plt.show()

##### Question 2a ii

In [None]:
degrees = [2, 5, 10, 14, 18]
plt.figure(figsize=(10, 6))
x_plot = np.linspace(0, 1, 500)
for degree in degrees:
    X = extract_features(x_sample, degree-1)
    coeffs = linear_regression(X, y_sample)
    X_plot = extract_features(x_plot, degree-1)
    y_plot = X_plot @ coeffs
    print(coeffs)
    plt.plot(x_plot, y_plot, label=f'k={degree}')


plt.scatter(x_sample, y_sample, color='red', label='Data Points')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Polynomial Fits for Different Degrees')
plt.legend()
plt.grid(True)
plt.ylim(-1.5, 1.5)
ax = plt.gca()
ax.spines['bottom'].set_position(('data', 0))
ax.spines['left'].set_position(('data', 0))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.show()


In [None]:
training_errors = []
degrees = [i for i in range(1,19)]
for degree in range(0,18):
    X = extract_features(x_sample, degree)
    coeffs = linear_regression(X, y_sample)
    y_pred = X @ coeffs
    mse = np.mean((y_sample-y_pred)**2)
    training_errors.append(mse)
plt.figure(figsize=(10, 6))
plt.plot(degrees, np.log(training_errors), marker='o')
plt.xlabel('Polynomial Dimension (k)')
plt.ylabel('ln(MSE)')
plt.title('Log Training Error vs Polynomial Dimension')
plt.grid(True)
plt.show()
print(training_errors)