In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Lasso, LinearRegression, Ridge
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error

# Read data
df = pd.read_csv('train_clean.csv', index_col='Id')

# Perform one-hot encoding for categorical features
df_encoded = pd.get_dummies(df)

X = df_encoded.drop('Verkaufspreis', axis=1)
y = df_encoded['Verkaufspreis']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)

# Create polynomial features
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X_train)

# Fit linear regression model
model = LinearRegression()

scores = cross_val_score(model, X_poly, y_train, cv=5)
print("Cross validation scores:", scores)
print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))


model.fit(X_poly, y_train)



X_test_poly = poly.transform(X_test)
y_pred = model.predict(X_test_poly)

r2 = r2_score(y_test, y_pred)
print("R^2 Score:", r2)

# Calculate mean squared error
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)



Cross validation scores: [-53.6314338    0.30211124   0.06979628   0.63331021   0.3228186 ]
-10.46 accuracy with a standard deviation of 21.59
R^2 Score: -1.4880929668054295
Mean Squared Error: 8979405892.814257


In [2]:
from sklearn.model_selection import GridSearchCV

# Define the hyperparameters to tune
param_grid = {
    'copy_X': [True, False],
    'fit_intercept': [2, 3, 4],
    'n_jobs': [1, 2, 3],
    'positive': [True, False]
}

# Create the grid search object
grid_search = GridSearchCV(model, param_grid, cv=5)

# Fit the grid search to the training data
grid_search.fit(X_poly, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)


