<a href="https://colab.research.google.com/github/OnyekaPeter/Basic_Model/blob/master/StageC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the dataset into a DataFrame
df = pd.read_csv('energydata_complete.csv')

# Extract the features and target
X = df['T2'].values.reshape(-1, 1)  # Input feature (temperature in the living room)
y = df['T6'].values  # Target (temperature outside the building)

# Create and fit a linear regression model
model = LinearRegression()
model.fit(X, y)

# Make predictions
y_pred = model.predict(X)

# Calculate the Root Mean Squared Error (RMSE)
rmse = np.sqrt(mean_squared_error(y, y_pred))

# Print the RMSE rounded to three decimal places
print(f'RMSE: {rmse:.3f}')


RMSE: 3.644


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Drop the specified columns
df = df.drop(["date", "lights"], axis=1)

# Define the features (independent variables) and the target variable
X = df.drop("Appliances", axis=1)
y = df["Appliances"]

# Split the data into a 70-30 train-test set with a random state of 42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Normalize the dataset using MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create and fit a multiple linear regression model on the training set
model = LinearRegression()
model.fit(X_train, y_train)

# Predict on the training set
y_train_pred = model.predict(X_train)

# Calculate Mean Absolute Error (MAE) and Root Mean Squared Error (RMSE) for the training set
mae_train = mean_absolute_error(y_train, y_train_pred)
rmse_train = mean_squared_error(y_train, y_train_pred, squared=False)

# Print the MAE and RMSE rounded to three decimal places
print(f'Mean Absolute Error (MAE) for the training set: {mae_train:.3f}')
print(f'Root Mean Squared Error (RMSE) for the training set: {rmse_train:.3f}')


Mean Absolute Error (MAE) for the training set: 53.742
Root Mean Squared Error (RMSE) for the training set: 95.216


In [5]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Predict on the test set
y_test_pred = model.predict(X_test)

# Calculate Mean Absolute Error (MAE) and Root Mean Squared Error (RMSE) for the test set
mae_test = mean_absolute_error(y_test, y_test_pred)
rmse_test = mean_squared_error(y_test, y_test_pred, squared=False)

# Print the MAE and RMSE for the test set, rounded to three decimal places
print(f'Mean Absolute Error (MAE) for the test set: {mae_test:.3f}')
print(f'Root Mean Squared Error (RMSE) for the test set: {rmse_test:.3f}')

# Determine if the model overfit to the training set
overfit = 'No' if mae_train <= mae_test else 'Yes'
print(f'Did the model overfit to the training set? {overfit}')


Mean Absolute Error (MAE) for the test set: 53.643
Root Mean Squared Error (RMSE) for the test set: 93.640
Did the model overfit to the training set? Yes


In [6]:
from sklearn.linear_model import Ridge, Lasso

# Train a Ridge regression model with default parameters
ridge_model = Ridge()
ridge_model.fit(X_train, y_train)

# Predict with the Ridge model on the test set
y_test_pred_ridge = ridge_model.predict(X_test)

# Calculate RMSE with Ridge regression on the test set
rmse_test_ridge = mean_squared_error(y_test, y_test_pred_ridge, squared=False)

# Determine if there is any change in RMSE with Ridge regression
rmse_change_ridge = 'No' if rmse_test == rmse_test_ridge else 'Yes'

# Train a Lasso regression model with default parameters
lasso_model = Lasso()
lasso_model.fit(X_train, y_train)

# Predict with the Lasso model on the test set
y_test_pred_lasso = lasso_model.predict(X_test)

# Calculate RMSE with Lasso regression on the test set
rmse_test_lasso = mean_squared_error(y_test, y_test_pred_lasso, squared=False)

# Count the number of non-zero feature weights in Lasso regression
non_zero_weights = sum(lasso_model.coef_ != 0)

# Print the answers to the questions
print(f'Did the RMSE change with Ridge Regression? {rmse_change_ridge}')
print(f'Number of features with non-zero weights in Lasso Regression: {non_zero_weights}')
print(f'RMSE with Lasso Regression on the test set: {rmse_test_lasso:.3f}')


Did the RMSE change with Ridge Regression? Yes
Number of features with non-zero weights in Lasso Regression: 4
RMSE with Lasso Regression on the test set: 99.424
