In [13]:
#ordinary least squares

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline

data = pd.read_csv('BostonHousing.csv')

X = data.drop(columns=['AGE'])
y = data['AGE']

# Split data in train en test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Definieer een pipeline met PolynomialFeatures en LinearRegression
polynomial_model = Pipeline([
    ('poly', PolynomialFeatures(degree=2)),
    ('linear', LinearRegression())
])

# Train het model
polynomial_model.fit(X_train, y_train)

# Voorspel op de test data
y_pred = polynomial_model.predict(X_test)

# Bereken de evaluatiemetrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

Mean Squared Error: 195.12709618035697
R-squared: 0.7639112642219437


In [14]:
#lasso regression

import pandas as pd
import os
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score

# Laad dataset
data = pd.read_csv("BostonHousing.csv")

# Definieer features en target
X = data.drop(columns=['AGE'])
y = data['AGE']

# Split de data in training en test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

lasso_model = Lasso()

# parameter grid voor GridSearch
param_grid = {
    'alpha': [0.01, 0.1, 1, 10, 100]
}

# Definieer GridSearchCV
grid_search = GridSearchCV(estimator=lasso_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')

# Pas GridSearch toe op de training data
grid_search.fit(X_train, y_train)

# Haal de beste parameters op
best_params = grid_search.best_params_
print(f'Beste parameters: {best_params}')

# Train het model opnieuw met de beste parameters
best_lasso_model = grid_search.best_estimator_
best_lasso_model.fit(X_train, y_train)

# Voorspel op de test data
y_pred = best_lasso_model.predict(X_test)

# Bereken de evaluatiemetrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')


Beste parameters: {'alpha': 0.01}
Mean Squared Error: 248.81590965435387
R-squared: 0.6989519410596498


In [15]:
#ridge regression

import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score

data = pd.read_csv('BostonHousing.csv')

# Definieer features en target
X = data.drop(columns=['AGE'])
y = data['AGE']

# Split de data in training en test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Definieer het model
ridge_model = Ridge()

# Definieer het parameter grid voor GridSearch
param_grid = {
    'alpha': [0.01, 0.1, 1, 10, 100]
}

# Definieer GridSearchCV
grid_search = GridSearchCV(estimator=ridge_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')

# Pas GridSearch toe op de training data
grid_search.fit(X_train, y_train)

# Haal de beste parameters op
best_params = grid_search.best_params_
print(f'Beste parameters: {best_params}')

# Train het model opnieuw met de beste parameters
best_ridge_model = grid_search.best_estimator_
best_ridge_model.fit(X_train, y_train)

# Voorspel op de test data
y_pred = best_ridge_model.predict(X_test)

# Bereken de metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')


Beste parameters: {'alpha': 0.1}
Mean Squared Error: 249.42544444054906
R-squared: 0.6982144509831687
