In [None]:
#experiment -5
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load a dataset (e.g., Ames Housing instead of Boston, which is deprecated)
housing = fetch_openml(name='house_prices', as_frame=True)
data = housing.frame

# Target variable
data = data.dropna(subset=['SalePrice'])  # Drop rows where target is missing
data = data.drop(columns=['Id'])  # Drop non-informative columns
data['PRICE'] = data['SalePrice']
data = data.drop(columns=['SalePrice'])

# Drop columns with too many missing values or non-numeric types that can't be encoded easily
data = data.dropna(thresh=len(data) * 0.9, axis=1)  # Drop columns with >10% missing
data = data.dropna()  # Drop rows with any remaining missing values

# Encode categorical variables
data = pd.get_dummies(data, drop_first=True)

# Split dataset into training and testing sets
X = data.drop('PRICE', axis=1)
y = data['PRICE']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 1. Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)

# 2. Decision Tree
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)

# 3. Random Forest
rf = RandomForestRegressor(random_state=42, n_estimators=100)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

# Evaluation Function
def evaluate_model(model_name, y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    print(f"{model_name} - RMSE: {rmse:.2f}, R2: {r2:.2f}")

# Evaluate all models
evaluate_model("Linear Regression", y_test, lr_pred)
evaluate_model("Decision Tree", y_test, dt_pred)
evaluate_model("Random Forest", y_test, rf_pred)




Linear Regression - RMSE: 47195.86, R2: 0.48
Decision Tree - RMSE: 34657.66, R2: 0.72
Random Forest - RMSE: 26526.61, R2: 0.84
