#### a. Using scikit-learn:

In [1]:
import numpy as np
from sklearn.linear_model import LinearRegression

# Read datasets with comma as the delimiter
train_data = np.genfromtxt('trainProject2.txt', delimiter=',')
test_data = np.genfromtxt('testProject2.txt', delimiter=',')

x_train = train_data[:, 0]
y_train = train_data[:, 1]

x_test = test_data[:, 0]
y_test = test_data[:, 1]

def gen_features(x, k=0.5, d=3):
    features = [np.ones_like(x)]
    for i in range(1, d+1):
        features.append(np.sin(i * k * x))
    return np.vstack(features).T

def fit_model(x_train, y_train, k=0.5, d=3):
    X_train = gen_features(x_train, k, d)
    model = LinearRegression().fit(X_train, y_train)
    return model

models = [fit_model(x_train, y_train, k=0.5, d=d) for d in range(4)]


FileNotFoundError: trainProject2.txt not found.

#### b. Using only NumPy:

In [None]:
import numpy as np

# Read datasets with comma as the delimiter
train_data = np.genfromtxt('trainProject2.txt', delimiter=',')
test_data = np.genfromtxt('testProject2.txt', delimiter=',')

x_train = train_data[:, 0]
y_train = train_data[:, 1]

x_test = test_data[:, 0]
y_test = test_data[:, 1]

def gen_features(x, k=0.5, d=3):
    features = [np.ones_like(x)]
    for i in range(1, d+1):
        features.append(np.sin(i * k * x))
    return np.vstack(features).T

def linear_regression(X, y):
    theta = np.linalg.inv(X.T @ X) @ X.T @ y
    return theta

def fit_model(x_train, y_train, k=0.5, d=3):
    X_train = gen_features(x_train, k, d)
    theta = linear_regression(X_train, y_train)
    return theta

thetas = [fit_model(x_train, y_train, k=0.5, d=d) for d in range(4)]


#### c. Plotting the Regression Learner's Result:

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Read datasets with comma as the delimiter
train_data = np.genfromtxt('trainProject2.txt', delimiter=',')
x_train = train_data[:, 0]
y_train = train_data[:, 1]

def gen_features(x, k=0.5, d=3):
    features = [np.ones_like(x)]
    for i in range(1, d+1):
        features.append(np.sin(i * k * x))
    return np.vstack(features).T

def linear_regression(X, y):
    theta = np.linalg.inv(X.T @ X) @ X.T @ y
    return theta

def predict(X, theta):
    return X @ theta

def plot(x, y, k=0.5, d=3):
    X = gen_features(x, k, d)
    theta = linear_regression(X, y)
    y_pred = predict(X, theta)
    
    # Sorting for plotting
    sorted_indices = np.argsort(x)
    
    plt.scatter(x[sorted_indices], y[sorted_indices], color='blue', label='Data Points')
    plt.plot(x[sorted_indices], y_pred[sorted_indices], color='red', label=f'Fit for d={d}')
    plt.legend()
    plt.title(f"Function Depth: {d}")
    plt.xlabel('x')
    plt.ylabel('y')
    plt.grid(True)
    plt.show()

for d in range(4):
    plot(x_train, y_train, k=0.5, d=d)


#### d. Evaluating Regression Functions:

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Read datasets with comma as the delimiter
train_data = np.genfromtxt('trainProject2.txt', delimiter=',')
test_data = np.genfromtxt('testProject2.txt', delimiter=',')

x_train = train_data[:, 0]
y_train = train_data[:, 1]
x_test = test_data[:, 0]
y_test = test_data[:, 1]

def gen_features(x, k=0.5, d=3):
    features = [np.ones_like(x)]
    for i in range(1, d+1):
        features.append(np.sin(i * k * x))
    return np.vstack(features).T

def linear_regression(X, y):
    theta = np.linalg.inv(X.T @ X) @ X.T @ y
    return theta

def predict(X, theta):
    return X @ theta

def compute_mse(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

errors = []

for d in range(4):
    # Train on training data
    X_train = gen_features(x_train, k=0.5, d=d)
    theta = linear_regression(X_train, y_train)
    
    # Predict on test data
    X_test = gen_features(x_test, k=0.5, d=d)
    y_test_pred = predict(X_test, theta)
    
    # Compute the error
    error = compute_mse(y_test, y_test_pred)
    errors.append(error)

# Plot the errors
plt.figure()
plt.plot(range(4), errors, marker='o', linestyle='-', color='blue')
plt.xlabel('Function Depth (d)')
plt.ylabel('Mean Squared Error')
plt.title('Error vs. Function Depth')
plt.xticks(range(4))
plt.grid(True)
plt.show()

best_d = np.argmin(errors)
print(f"The best function depth is d={best_d} with the minimum error of {errors[best_d]:.4f}.")
