# Random Forrest Regressor Model

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
data_path = 'Table_CSVs/energy_efficient.csv'
data = pd.read_csv(data_path)

# Split the dataset into features (X) and target variable (y)
X = data.drop(columns=['TOTALBTU'])  # Adjust 'TARGET_COLUMN' to your target variable
y = data['TOTALBTU']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest Regressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error (MSE): {mse}')
print(f'R-squared (R2): {r2}')



Mean Squared Error (MSE): 1080329681.9631333
R-squared (R2): 0.5760602548867669


# Random Forrest Regressor Model Redo

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
data_path = 'C:/Users/Joanna/Project4_Energy_Consumption/Table_CSVs/energy_efficient.csv'
data = pd.read_csv(data_path)

# Split the dataset into features (X) and target variable (y)
X = data.drop(columns=['TOTALBTU'])  # Adjust 'TARGET_COLUMN' to your target variable
y = data['TOTALBTU']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest Regressor model
rf_model = RandomForestRegressor(n_estimators=110, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error (MSE): {mse}')
print(f'R-squared (R2): {r2}')


Mean Squared Error (MSE): 1077387604.883598
R-squared (R2): 0.5772147759816009


# Linear Regressor Model

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load your dataset
data_path = r'C:\Users\Joanna\Project4_Energy_Consumption\Table_CSVs\energy_efficient.csv'
energy_consumption_df = pd.read_csv(data_path)

# Drop any rows with missing values
energy_consumption_df.dropna(inplace=True)

# Split data into features (X) and target variable "TOTALBTU" (y)
X = energy_consumption_df.drop(['TOTALBTU'], axis=1)
y = energy_consumption_df['TOTALBTU']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Linear Regression model
lr_model = LinearRegression()

# Train the model
lr_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = lr_model.predict(X_test)

# Evaluate model performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared (R2): {r2}")



Mean Squared Error (MSE): 1020605771.544824
R-squared (R2): 0.5994969333217187


# Linear Regressor Model Updated

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Load your dataset
data_path = r'Table_CSVs\energy_efficient.csv'
energy_consumption_df = pd.read_csv(data_path)

# Drop any rows with missing values
energy_consumption_df.dropna(inplace=True)

# Drop the 'DOEID' column and update the DataFrame
energy_consumption_df.drop('DOEID', axis=1, inplace=True)

import pandas as pd

# Assuming df is your DataFrame containing the energy data
# Define a function to categorize TOTALBTU and TOTALDOL
def categorize_total(row):
    # Adjust the conditions based on your criteria for each category
    if row['TOTALBTU'] > 50000 and row['TOTALDOL'] > 1000:
        return 1  # Category 1
    else:
        return 2  # Category 2

# Apply the categorization function to create separate columns for each category
energy_consumption_df['BTU_Category'] = energy_consumption_df.apply(lambda row: 1 if row['TOTALBTU'] > 50000 else 2, axis=1)
energy_consumption_df['DOL_Category'] = energy_consumption_df.apply(lambda row: 1 if row['TOTALDOL'] > 1000 else 2, axis=1)

# Split data into features (X) and target variable "TOTALBTU" (y)
X = energy_consumption_df.drop(['TOTALBTU'], axis=1)
y = energy_consumption_df['TOTALBTU']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.1, random_state=45)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the Linear Regression model
lr_model = LinearRegression()

# Train the model
lr_model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = lr_model.predict(X_test_scaled)

# Evaluate model performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared (R2): {r2}")

Mean Squared Error (MSE): 867655446.6451124
R-squared (R2): 0.6994187472726883
