In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

url = 'https://raw.githubusercontent.com/Data-Science-FMI/ml-from-scratch-2019/master/data/house_prices_train.csv'
df = pd.read_csv(url)

columns_to_drop = ['Id', 'Alley', 'PoolQC', 'MiscFeature','3SsnPorch']
df.drop(columns_to_drop, axis=1, inplace=True)

df.dropna(inplace=True)

df = pd.get_dummies(df)

X = df.drop('SalePrice', axis=1)
y = df['SalePrice']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree Regression': DecisionTreeRegressor(),
    'Random Forest Regression': RandomForestRegressor(),
    'Gradient Boosting Regression': GradientBoostingRegressor(),
}

for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print("\n{}:".format(model_name))
    print("Mean Squared Error:", mse)
    print("R-squared:", r2)


Linear Regression:
Mean Squared Error: 3577399915.361617
R-squared: 0.45168782756206083

Decision Tree Regression:
Mean Squared Error: 4620949625.0
R-squared: 0.29174177124284084

Random Forest Regression:
Mean Squared Error: 2125079494.1611252
R-squared: 0.6742866378893455

Gradient Boosting Regression:
Mean Squared Error: 2545781067.1953883
R-squared: 0.6098052271116641
