In [3]:
# Script for model 3: Predict sepal length using petal length and petal width
# Import the libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load the iris dataset and convert it to a dataframe
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target

# Split the data into X and y
X = df[['petal length (cm)', 'petal width (cm)']].values
y = df['sepal length (cm)'].values.reshape(-1, 1)

# Split the data into train, validation and test sets with a 80/10/10 ratio
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=df['target'])
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1111, random_state=42)

# Define a class for the linear regression model that has a predict and score method
class LinearRegression:
    def __init__(self, weights=None, bias=None):
        # Initialize the weights and bias from the given parameters or as None
        self.weights = weights
        self.bias = bias

    def predict(self, X):
        """Predict using the linear model.
        Parameters
        ----------
        X: numpy.ndarray
            The input data.
        """
        # Compute the linear combination of the input features and the weights, plus the bias.
        return X @ self.weights + self.bias

    def score(self, X, y):
        """Evaluate the linear model using the mean squared error.
        Parameters
        ----------
        X: numpy.ndarray
            The input data.
        y: numpy.ndarray
            The target data.
        """
        # Compute the predictions and the errors for the given data.
        y_pred = self.predict(X)
        errors = y_pred - y

        # Compute and return the mean squared error.
        return (1 / X.shape[0]) * (errors.T @ errors)

# Create an instance of the linear regression class with the weights and bias from model 3
model_3 = LinearRegression(weights=np.array([0.7099, 0.6508]), bias=np.array([4.1909]))

# Evaluate the model on the test set and print the mean squared error
test_error = model_3.score(X_test, y_test)
print(f"Test error for model 3: {test_error}")


Test error for model 3: [[ 6.16757948  7.9777881   8.22659894  4.7028143   9.74658275 -0.97662239
  -0.97662239 -0.64498361  9.19390595  5.98795544 -1.14244178  8.42002764
   6.30578956  7.43891596 -0.810803  ]
 [ 7.9777881  10.38859072 10.71995242  6.02704071 12.74423897 -1.53672885
  -1.53672885 -1.09505841 12.00819416  7.73856809 -1.75756407 10.97755722
   8.16185374  9.6709307  -1.31589363]
 [ 8.22659894 10.71995242 11.06266064  6.20905392 13.15626295 -1.61371476
  -1.61371476 -1.15692061 12.39501444  7.97918754 -1.84211183 11.32908636
   8.41696737  9.97771824 -1.38531769]
 [ 4.7028143   6.02704071  6.20905392  3.63129099  7.32097145 -0.52340145
  -0.52340145 -0.28079691  6.91667043  4.57141348 -0.64470372  6.3505533
   4.80391946  5.63283824 -0.40209918]
 [ 9.74658275 12.74423897 13.15626295  7.32097145 15.67331478 -2.08402119
  -2.08402119 -1.5348364  14.75809705  9.44913022 -2.35861359 13.47657561
   9.97545487 11.85188137 -1.80942879]
 [-0.97662239 -1.53672885 -1.61371476 -0.5