## Part 3: Scikit-learn Implementation

1. Importing all necessary libraries

In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import time

### Defining main function to evaluate the model

In [8]:
def main():

    data = pd.read_csv('data.csv')

    X = data.drop('median_house_value', axis=1)
    y = data['median_house_value']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = LinearRegression()
    start_time = time.time()
    model.fit(X_train_scaled, y_train)
    end_time = time.time()

    print("Training Time:", (end_time - start_time))

    y_pred = model.predict(X_test_scaled)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)

    rmse_train = np.sqrt(mean_squared_error(y_train, model.predict(X_train_scaled)))
    r2_train = r2_score(y_train, model.predict(X_train_scaled))
    mae_train = mean_absolute_error(y_train, model.predict(X_train_scaled))

    print("Training MAE:", mae_train)
    print("Training RMSE:", rmse_train)
    print("Training R2 Score:", r2_train)


    print("Testing MAE:", mae)
    print("Testing RMSE:", rmse)
    print("Testing R2 Score:", r2)

if __name__ == "__main__":
    main()

Training Time: 0.007021903991699219
Training MAE: 48352.88709716362
Training RMSE: 66319.52155392875
Training R2 Score: 0.670979157174141
Testing MAE: 50277.55447390933
Testing RMSE: 69407.04991577457
Testing R2 Score: 0.6323790009072097
