In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import make_scorer, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib
from IPython.display import display

## Data Loader

In [3]:
df_ny = pd.read_csv('../data/processed/ny.csv')
df_aus = pd.read_csv('../data/processed/austin.csv')
df = pd.read_csv('../data/processed/combined_data.csv')

# Display the combined DataFrame
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26200 entries, 0 to 26199
Data columns (total 30 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   date          26200 non-null  object 
 1   grid          26200 non-null  float64
 2   bathroom      26200 non-null  float64
 3   bedroom       26200 non-null  float64
 4   car           26200 non-null  float64
 5   diningroom    26200 non-null  float64
 6   livingroom    26200 non-null  float64
 7   office        26200 non-null  float64
 8   utilityroom   26200 non-null  float64
 9   waterheater   26200 non-null  float64
 10  aircomp       26200 non-null  float64
 11  kitchenArea   26200 non-null  float64
 12  washer_dryer  26200 non-null  float64
 13  other         26200 non-null  float64
 14  day           26200 non-null  object 
 15  month         26200 non-null  object 
 16  day_name      26200 non-null  object 
 17  hour          26200 non-null  int64  
 18  minute        26200 non-nu

## Fix Data Types

In [4]:
# Define a function to convert data types
def convert_data_types(df):
    # Convert 'date' column to datetime if it exists
    if 'date' in df.columns and df['date'].dtype == 'object':
        df['date'] = pd.to_datetime(df['date'], errors='coerce')
        
    if 'day' in df.columns and df['day'].dtype == 'object':
        df['day'] = pd.to_datetime(df['day'], errors='coerce')
    
    # Convert all other object columns to categorical
    for column in df.select_dtypes(include='object').columns:
        df[column] = df[column].astype('category')
    
    return df

# Define the correct order for months and days
month_order = ['January', 'February', 'March', 'April', 'May', 'June', 
               'July', 'August', 'September', 'October', 'November', 'December']
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
time_of_day_order = ['Night', 'Morning', 'Afternoon', 'Evening']

# Define a function to convert data types and set categorical order
def convert_and_order_categories(df):
    # Convert 'date' column to datetime if it exists
    if 'date' in df.columns and df['date'].dtype == 'object':
        df['date'] = pd.to_datetime(df['date'], errors='coerce')
    
    # Convert categorical columns with explicit ordering
    if 'month' in df.columns:
        df['month'] = pd.Categorical(df['month'], categories=month_order, ordered=True)
    if 'day_name' in df.columns:
        df['day_name'] = pd.Categorical(df['day_name'], categories=day_order, ordered=True)
    if 'time_of_day' in df.columns:
        df['time_of_day'] = pd.Categorical(df['time_of_day'], categories=time_of_day_order, ordered=True)
    
    # Convert remaining object columns to category without specific ordering
    for column in df.select_dtypes(include='object').columns:
        if column not in ['month', 'day_name', 'time_of_day']:  # Exclude already converted columns
            df[column] = df[column].astype('category')
    
    return df



In [None]:
# Apply the function to both DataFrames
df = convert_data_types(df)

df = convert_and_order_categories(df)

# ML Prediction

### Evaluate Multi-DF

In [None]:
# Global settings
max_iter = 10  # Maximum number of iterations for RandomizedSearchCV
cv_folds = 5  # Number of cross-validation folds
base_model_save_path = '../models/energy_consumption'  # Base directory to save models

# Ensure the base model save directory exists
os.makedirs(base_model_save_path, exist_ok=True)

# Function to calculate Mean Absolute Percentage Error (MAPE)
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Define MAPE scorer for RandomizedSearchCV
mape_scorer = make_scorer(mean_absolute_percentage_error, greater_is_better=False)

# Function to plot and save actual vs predicted values for both train and test sets for the best model only
def plot_best_model(y_train, y_train_pred, y_test, y_test_pred, model_name, target_name, city_name, model_save_path):
    plt.figure(figsize=(12, 6))

    # Plot for training set
    plt.subplot(1, 2, 1)
    plt.scatter(y_train, y_train_pred, alpha=0.5, color="blue", label="Predicted vs Actual")
    plt.plot([min(y_train), max(y_train)], [min(y_train), max(y_train)], color="red", linestyle="--", label="Perfect Fit (y=x)")
    plt.xlabel("Actual")
    plt.ylabel("Predicted")
    plt.title(f"Train: Actual vs Predicted ({model_name}) - {target_name}")
    plt.legend()

    # Plot for test set
    plt.subplot(1, 2, 2)
    plt.scatter(y_test, y_test_pred, alpha=0.5, color="green", label="Predicted vs Actual")
    plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color="red", linestyle="--", label="Perfect Fit (y=x)")
    plt.xlabel("Actual")
    plt.ylabel("Predicted")
    plt.title(f"Test: Actual vs Predicted ({model_name}) - {target_name}")
    plt.legend()

    plt.tight_layout()
    
    # Save the plot as a PNG file
    plot_filename = f"{model_save_path}/best_model_plot_{target_name}.png"
    plt.savefig(plot_filename)  # Save the plot as a PNG file
    plt.close()
    print(f"Plot for best model of {city_name} - {target_name} saved as {plot_filename}")
    
    # Display the plot
    plt.show()

# Function to prepare the data with preprocessing (OneHotEncoding, Scaling)
def prepare_data(df, target_column, feature_columns=None):
    # Prepare the features (X) and target (y)
    X = df[feature_columns]
    y = df[target_column].values

    months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

    day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

    seasons = ['Spring', 'Summer', 'Fall', 'Winter']

    cities = ['austin', 'ny']

    categorical_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
    numerical_cols = X.select_dtypes(include=['number']).columns.tolist()

    category_definitions = {
        'month': months,
        'day_name': day_names,
        'season': seasons,
        'city': cities,
    }

    # Create a list of categories for the OneHotEncoder
    categories_list = [category_definitions[col] for col in categorical_cols]

    # Create the OneHotEncoder with specific categories
    categorical_transformer = OneHotEncoder(
        categories=categories_list,  # Correctly specify categories
        handle_unknown='ignore'
    )

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numerical_cols),   # Scale numerical features
            ('cat', categorical_transformer, categorical_cols)   # One-hot encode categorical features
        ]
    )
    X_preprocessed = preprocessor.fit_transform(X)

    # Verify the shape
    print(f"Shape of preprocessed features: {X_preprocessed.shape}")

    print(X_preprocessed.shape)

    # Split the data randomly into train (80%) and test (20%) sets
    X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

    return X_train, y_train, X_test, y_test

    # Apply transformations to the features
    X_preprocessed = preprocessor.fit_transform(X)

    # Split the data randomly into train (80%) and test (20%) sets
    X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

    return X_train, y_train, X_test, y_test

# Function to evaluate models using RandomizedSearchCV for hyperparameter optimization
def evaluate_models(X_train, y_train, X_test, y_test, target_name, city_name):
    print(f"\nEvaluating models for City: {city_name}, Target: {target_name}")
    
    # Initialize base regression models and hyperparameter search space
    models = {
        'Linear Regression': {
            'model': LinearRegression(),
            'params': None  # No hyperparameter tuning for LinearRegression
        },
        'Random Forest': {
            'model': RandomForestRegressor(random_state=42, bootstrap=True, oob_score=True, n_jobs=-1),
            'params': {
                'n_estimators': [25, 50, 100, 200],
                'max_depth': [3, 5, 10, 15],
                'min_samples_split': [2, 5, 8],
                'min_samples_leaf': [1, 2, 5]
            }
        },
        'XGBoost': {
            'model': XGBRegressor(random_state=42, early_stopping_rounds=10),  # Specify early stopping here
            'params': {
                'n_estimators': [25, 50, 100, 200],
                'max_depth': [3, 5, 10],
                'learning_rate': [0.01, 0.1],
                'min_child_weight': [1, 5, 10],
                'subsample': [0.5, 0.8],
                'colsample_bytree': [0.5, 0.7, 1.0]
            }
        }
    }

    best_overall_model = None
    best_overall_score = -np.inf
    results = []

    # Create a subfolder for each city and target (appliance)
    model_save_path = f"{base_model_save_path}/{city_name}/{target_name}"
    os.makedirs(model_save_path, exist_ok=True)

    # Split X_train into train and validation sets for early stopping
    X_train_part, X_val, y_train_part, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

    # Loop through models and perform training and evaluation
    for model_name, model_info in models.items():
        model = model_info['model']
        params = model_info['params']

        if model_name == 'XGBoost':
            # Use eval_set for early stopping
            optimizer = RandomizedSearchCV(
                model, params, n_iter=max_iter, cv=cv_folds, random_state=42,
                scoring='r2', n_jobs=-1
            )
            optimizer.fit(
                X_train_part, y_train_part,
                eval_set=[(X_val, y_val)],  # Validation set for early stopping
                verbose=False
            )
            best_model = optimizer.best_estimator_

        elif params is not None:
            optimizer = RandomizedSearchCV(model, params, n_iter=max_iter, cv=cv_folds, random_state=42, scoring='r2', n_jobs=-1)
            optimizer.fit(X_train, y_train)
            best_model = optimizer.best_estimator_
        
        else:
            best_model = model
            best_model.fit(X_train, y_train)

        # Predictions and evaluations
        y_train_pred = best_model.predict(X_train)
        y_test_pred = best_model.predict(X_test)
        mae_train = mean_absolute_error(y_train, y_train_pred)
        mape_train = mean_absolute_percentage_error(y_train, y_train_pred)
        r2_train = r2_score(y_train, y_train_pred)
        mae_test = mean_absolute_error(y_test, y_test_pred)
        mape_test = mean_absolute_percentage_error(y_test, y_test_pred)
        r2_test = r2_score(y_test, y_test_pred)

        # Store results
        results.append({
            'Model': model_name,
            'Train_MAE': round(mae_train, 2),
            'Train_MAPE (%)': round(mape_train, 2),
            'Train_R2': round(r2_train, 2),
            'Test_MAE': round(mae_test, 2),
            'Test_MAPE (%)': round(mape_test, 2),
            'Test_R2': round(r2_test, 2)
        })

        if r2_test > best_overall_score:
            best_overall_score = r2_test
            best_overall_model = best_model
            y_train_best, y_test_best = y_train_pred, y_test_pred

    # Plot and save the best model predictions
    plot_best_model(y_train, y_train_best, y_test, y_test_best, best_overall_model.__class__.__name__, target_name, city_name, model_save_path)

    # Save the best model
    best_model_filename = f"{model_save_path}/best_model_{target_name}.joblib"
    joblib.dump(best_overall_model, best_model_filename)
    print(f"Best model for {city_name} - {target_name} saved as {best_model_filename}")

    # Summary DataFrame
    model_summary_df = pd.DataFrame(results, columns=['Model', 'Train_MAE', 'Train_MAPE (%)', 'Train_R2', 'Test_MAE', 'Test_MAPE (%)', 'Test_R2'])
    summary_filename = f"{model_save_path}/model_summary_{target_name}.csv"
    model_summary_df.to_csv(summary_filename, index=False)
    display(model_summary_df)

    return model_summary_df, best_overall_model.__class__.__name__

# Loop through each city and target, and run model training and evaluation
def run_model_per_city(dfs, targets):
    all_best_models_summary = []  # To store best models for each city and target
    for city_name, city_info in dfs.items():
        df = city_info['data']
        features = city_info['features']
        best_models_summary = []
        
        for target_name in targets:
            if features:
                X_train, y_train, X_test, y_test = prepare_data(df, target_name, features)
                summary_df, best_model_name = evaluate_models(X_train, y_train, X_test, y_test, target_name, city_name)
                best_models_summary.append({
                    'DataFrame_Name': city_name,
                    'Target': target_name,
                    'Best_Model': best_model_name,
                    'Test_R2': summary_df['Test_R2'].max()
                })

        # Display summary table of best models for each appliance in this city
        best_models_summary_df = pd.DataFrame(best_models_summary)
        print(f"\nBest Models Summary for City: {city_name}\n")
        display(best_models_summary_df)
        
        # Add the current city's best models to the combined summary
        all_best_models_summary.extend(best_models_summary)

    # If more than one city, display a combined summary table for all cities
    if len(dfs) > 1:
        combined_best_models_summary_df = pd.DataFrame(all_best_models_summary)
        print("\nCombined Best Models Summary for All Cities\n")
        display(combined_best_models_summary_df)
        
        combined_best_models_summary_df.to_csv('../models/energy_consumption/models_summary.csv')
        

In [129]:
features_city = ['month', 'day_name', 'hour', 'season','is_holiday', 'temp', 'rhum', 'wspd']
features_multi = ['city','month', 'day_name', 'hour', 'season','is_holiday', 'temp', 'rhum', 'wspd']


dfs = {
    'Austin': {'data': df_aus, 'features': features_city},
    'NY': {'data': df_ny, 'features': features_city},
    'Multi_city': {'data': df, 'features': features_multi}
}

# Target columns to predict
target_columns = ['grid', 'bathroom', 'bedroom', 'car', 'diningroom','livingroom', 'office', 'utilityroom', 'waterheater', 'aircomp','kitchenArea', 'washer_dryer', 'other']

run_model_per_city(dfs, target_columns)


Evaluating models for City: Austin, Target: grid
Plot for best model of Austin - grid saved as ../models/energy_consumption/Austin/grid/best_model_plot_grid.png
Best model for Austin - grid saved as ../models/energy_consumption/Austin/grid/best_model_grid.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.56,172.54,0.44,0.56,218.12,0.44
1,Random Forest,0.15,57.42,0.95,0.21,112.67,0.91
2,XGBoost,0.14,47.94,0.96,0.2,113.54,0.92



Evaluating models for City: Austin, Target: bathroom
Plot for best model of Austin - bathroom saved as ../models/energy_consumption/Austin/bathroom/best_model_plot_bathroom.png
Best model for Austin - bathroom saved as ../models/energy_consumption/Austin/bathroom/best_model_bathroom.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.0,65.62,0.18,0.0,66.35,0.19
1,Random Forest,0.0,33.05,0.75,0.0,48.08,0.47
2,XGBoost,0.0,38.74,0.7,0.0,50.19,0.45



Evaluating models for City: Austin, Target: bedroom
Plot for best model of Austin - bedroom saved as ../models/energy_consumption/Austin/bedroom/best_model_plot_bedroom.png
Best model for Austin - bedroom saved as ../models/energy_consumption/Austin/bedroom/best_model_bedroom.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.01,24.44,0.28,0.01,24.65,0.29
1,Random Forest,0.0,10.35,0.85,0.0,13.92,0.73
2,XGBoost,0.0,6.46,0.92,0.0,11.6,0.79



Evaluating models for City: Austin, Target: car


  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


Plot for best model of Austin - car saved as ../models/energy_consumption/Austin/car/best_model_plot_car.png
Best model for Austin - car saved as ../models/energy_consumption/Austin/car/best_model_car.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.07,inf,0.09,0.07,inf,0.11
1,Random Forest,0.03,inf,0.74,0.05,inf,0.56
2,XGBoost,0.02,inf,0.85,0.04,inf,0.6



Evaluating models for City: Austin, Target: diningroom


  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


Plot for best model of Austin - diningroom saved as ../models/energy_consumption/Austin/diningroom/best_model_plot_diningroom.png
Best model for Austin - diningroom saved as ../models/energy_consumption/Austin/diningroom/best_model_diningroom.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.0,inf,0.02,0.0,inf,0.02
1,Random Forest,0.0,,0.64,0.0,inf,0.39
2,XGBoost,0.0,inf,0.08,0.0,inf,0.04



Evaluating models for City: Austin, Target: livingroom
Plot for best model of Austin - livingroom saved as ../models/energy_consumption/Austin/livingroom/best_model_plot_livingroom.png
Best model for Austin - livingroom saved as ../models/energy_consumption/Austin/livingroom/best_model_livingroom.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.01,48.78,0.16,0.01,49.03,0.16
1,Random Forest,0.0,16.14,0.85,0.01,20.81,0.73
2,XGBoost,0.0,12.42,0.9,0.01,18.95,0.75



Evaluating models for City: Austin, Target: office
Plot for best model of Austin - office saved as ../models/energy_consumption/Austin/office/best_model_plot_office.png
Best model for Austin - office saved as ../models/energy_consumption/Austin/office/best_model_office.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.0,36.01,0.51,0.0,37.0,0.49
1,Random Forest,0.0,18.53,0.86,0.0,26.6,0.74
2,XGBoost,0.0,15.63,0.92,0.0,25.3,0.77



Evaluating models for City: Austin, Target: utilityroom


  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


Plot for best model of Austin - utilityroom saved as ../models/energy_consumption/Austin/utilityroom/best_model_plot_utilityroom.png
Best model for Austin - utilityroom saved as ../models/energy_consumption/Austin/utilityroom/best_model_utilityroom.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.01,inf,0.36,0.01,inf,0.34
1,Random Forest,0.0,inf,0.92,0.0,inf,0.83
2,XGBoost,0.0,inf,0.96,0.0,inf,0.85



Evaluating models for City: Austin, Target: waterheater


  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


Plot for best model of Austin - waterheater saved as ../models/energy_consumption/Austin/waterheater/best_model_plot_waterheater.png
Best model for Austin - waterheater saved as ../models/energy_consumption/Austin/waterheater/best_model_waterheater.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.0,inf,0.01,0.0,inf,0.0
1,Random Forest,0.0,inf,0.01,0.0,inf,0.0
2,XGBoost,0.0,inf,0.01,0.0,inf,0.0



Evaluating models for City: Austin, Target: aircomp
Plot for best model of Austin - aircomp saved as ../models/energy_consumption/Austin/aircomp/best_model_plot_aircomp.png
Best model for Austin - aircomp saved as ../models/energy_consumption/Austin/aircomp/best_model_aircomp.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.32,167.82,0.25,0.3,171.13,0.74
1,Random Forest,0.09,28.84,0.34,0.1,39.88,0.92
2,XGBoost,0.19,93.6,0.3,0.17,94.35,0.91



Evaluating models for City: Austin, Target: kitchenArea
Plot for best model of Austin - kitchenArea saved as ../models/energy_consumption/Austin/kitchenArea/best_model_plot_kitchenArea.png
Best model for Austin - kitchenArea saved as ../models/energy_consumption/Austin/kitchenArea/best_model_kitchenArea.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.09,22.56,0.0,0.03,22.44,0.11
1,Random Forest,0.09,20.35,0.02,0.05,26.8,-100.09
2,XGBoost,0.09,21.9,-0.0,0.04,21.72,-0.22



Evaluating models for City: Austin, Target: washer_dryer
Plot for best model of Austin - washer_dryer saved as ../models/energy_consumption/Austin/washer_dryer/best_model_plot_washer_dryer.png
Best model for Austin - washer_dryer saved as ../models/energy_consumption/Austin/washer_dryer/best_model_washer_dryer.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.06,452.59,0.0,0.05,441.97,0.08
1,Random Forest,0.05,235.69,0.03,0.04,284.77,-1.8
2,XGBoost,0.06,475.36,-0.0,0.05,465.0,-0.0



Evaluating models for City: Austin, Target: other
Plot for best model of Austin - other saved as ../models/energy_consumption/Austin/other/best_model_plot_other.png
Best model for Austin - other saved as ../models/energy_consumption/Austin/other/best_model_other.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.15,35.37,0.0,0.02,36.05,0.09
1,Random Forest,0.17,52.1,0.03,0.06,59.78,-826.88
2,XGBoost,0.17,56.99,-0.0,0.05,56.68,-2.06



Best Models Summary for City: Austin



Unnamed: 0,DataFrame_Name,Target,Best_Model,Test_R2
0,Austin,grid,XGBRegressor,0.92
1,Austin,bathroom,RandomForestRegressor,0.47
2,Austin,bedroom,XGBRegressor,0.79
3,Austin,car,XGBRegressor,0.6
4,Austin,diningroom,RandomForestRegressor,0.39
5,Austin,livingroom,XGBRegressor,0.75
6,Austin,office,XGBRegressor,0.77
7,Austin,utilityroom,XGBRegressor,0.85
8,Austin,waterheater,XGBRegressor,0.0
9,Austin,aircomp,RandomForestRegressor,0.92



Evaluating models for City: NY, Target: grid
Plot for best model of NY - grid saved as ../models/energy_consumption/NY/grid/best_model_plot_grid.png
Best model for NY - grid saved as ../models/energy_consumption/NY/grid/best_model_grid.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.55,164.28,0.3,0.56,124.94,0.31
1,Random Forest,0.15,137.75,0.94,0.22,87.52,0.88
2,XGBoost,0.13,97.51,0.96,0.2,83.25,0.89



Evaluating models for City: NY, Target: bathroom
Plot for best model of NY - bathroom saved as ../models/energy_consumption/NY/bathroom/best_model_plot_bathroom.png
Best model for NY - bathroom saved as ../models/energy_consumption/NY/bathroom/best_model_bathroom.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.0,183.3,0.29,0.0,189.18,0.3
1,Random Forest,0.0,77.7,0.84,0.0,104.61,0.69
2,XGBoost,0.0,118.68,0.76,0.0,126.19,0.64



Evaluating models for City: NY, Target: bedroom
Plot for best model of NY - bedroom saved as ../models/energy_consumption/NY/bedroom/best_model_plot_bedroom.png
Best model for NY - bedroom saved as ../models/energy_consumption/NY/bedroom/best_model_bedroom.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.0,219.35,0.11,0.0,210.31,0.1
1,Random Forest,0.0,72.68,0.76,0.0,98.06,0.46
2,XGBoost,0.0,83.84,0.73,0.0,105.24,0.45



Evaluating models for City: NY, Target: car


  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


Plot for best model of NY - car saved as ../models/energy_consumption/NY/car/best_model_plot_car.png
Best model for NY - car saved as ../models/energy_consumption/NY/car/best_model_car.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.05,inf,0.02,0.05,2877.07,0.01
1,Random Forest,0.03,inf,0.7,0.04,1689.96,0.36
2,XGBoost,0.03,inf,0.66,0.04,1667.65,0.34



Evaluating models for City: NY, Target: diningroom


  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


Plot for best model of NY - diningroom saved as ../models/energy_consumption/NY/diningroom/best_model_plot_diningroom.png
Best model for NY - diningroom saved as ../models/energy_consumption/NY/diningroom/best_model_diningroom.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.0,inf,0.0,0.0,3.67,-0.05
1,Random Forest,0.0,inf,0.07,0.0,3.16,-0.54
2,XGBoost,0.0,inf,-0.0,0.0,3.25,-0.01



Evaluating models for City: NY, Target: livingroom
Plot for best model of NY - livingroom saved as ../models/energy_consumption/NY/livingroom/best_model_plot_livingroom.png
Best model for NY - livingroom saved as ../models/energy_consumption/NY/livingroom/best_model_livingroom.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.01,44.96,0.13,0.01,43.76,0.12
1,Random Forest,0.0,15.62,0.85,0.0,21.68,0.68
2,XGBoost,0.0,12.06,0.88,0.0,20.44,0.7



Evaluating models for City: NY, Target: office


  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


Plot for best model of NY - office saved as ../models/energy_consumption/NY/office/best_model_plot_office.png
Best model for NY - office saved as ../models/energy_consumption/NY/office/best_model_office.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.0,inf,0.05,0.0,25.73,0.05
1,Random Forest,0.0,inf,0.66,0.0,18.99,0.42
2,XGBoost,0.0,inf,0.51,0.0,20.65,0.33



Evaluating models for City: NY, Target: utilityroom
Plot for best model of NY - utilityroom saved as ../models/energy_consumption/NY/utilityroom/best_model_plot_utilityroom.png
Best model for NY - utilityroom saved as ../models/energy_consumption/NY/utilityroom/best_model_utilityroom.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.0,20.99,0.21,0.0,20.37,0.22
1,Random Forest,0.0,11.33,0.75,0.0,16.14,0.5
2,XGBoost,0.0,12.41,0.69,0.0,16.69,0.45



Evaluating models for City: NY, Target: waterheater
Plot for best model of NY - waterheater saved as ../models/energy_consumption/NY/waterheater/best_model_plot_waterheater.png
Best model for NY - waterheater saved as ../models/energy_consumption/NY/waterheater/best_model_waterheater.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.04,1111.01,0.02,0.04,813.0,0.02
1,Random Forest,0.03,992.71,0.17,0.04,725.97,0.13
2,XGBoost,0.03,998.45,0.19,0.04,725.17,0.14



Evaluating models for City: NY, Target: aircomp
Plot for best model of NY - aircomp saved as ../models/energy_consumption/NY/aircomp/best_model_plot_aircomp.png
Best model for NY - aircomp saved as ../models/energy_consumption/NY/aircomp/best_model_aircomp.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.08,81.74,0.19,0.08,83.08,0.2
1,Random Forest,0.03,24.72,0.88,0.04,35.65,0.76
2,XGBoost,0.03,26.1,0.87,0.04,37.38,0.75



Evaluating models for City: NY, Target: kitchenArea
Plot for best model of NY - kitchenArea saved as ../models/energy_consumption/NY/kitchenArea/best_model_plot_kitchenArea.png
Best model for NY - kitchenArea saved as ../models/energy_consumption/NY/kitchenArea/best_model_kitchenArea.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.03,29.92,0.15,0.03,30.07,0.13
1,Random Forest,0.02,14.44,0.74,0.02,18.45,0.58
2,XGBoost,0.02,16.99,0.65,0.02,19.07,0.56



Evaluating models for City: NY, Target: washer_dryer
Plot for best model of NY - washer_dryer saved as ../models/energy_consumption/NY/washer_dryer/best_model_plot_washer_dryer.png
Best model for NY - washer_dryer saved as ../models/energy_consumption/NY/washer_dryer/best_model_washer_dryer.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.05,1856.25,0.03,0.05,1923.17,0.04
1,Random Forest,0.03,621.78,0.67,0.04,969.62,0.37
2,XGBoost,0.03,774.82,0.58,0.04,1075.53,0.31



Evaluating models for City: NY, Target: other
Plot for best model of NY - other saved as ../models/energy_consumption/NY/other/best_model_plot_other.png
Best model for NY - other saved as ../models/energy_consumption/NY/other/best_model_other.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.02,32.41,0.1,0.02,33.37,0.11
1,Random Forest,0.01,15.54,0.77,0.02,22.22,0.55
2,XGBoost,0.01,13.44,0.8,0.01,21.77,0.53



Best Models Summary for City: NY



Unnamed: 0,DataFrame_Name,Target,Best_Model,Test_R2
0,NY,grid,XGBRegressor,0.89
1,NY,bathroom,RandomForestRegressor,0.69
2,NY,bedroom,RandomForestRegressor,0.46
3,NY,car,RandomForestRegressor,0.36
4,NY,diningroom,XGBRegressor,-0.01
5,NY,livingroom,XGBRegressor,0.7
6,NY,office,RandomForestRegressor,0.42
7,NY,utilityroom,RandomForestRegressor,0.5
8,NY,waterheater,XGBRegressor,0.14
9,NY,aircomp,RandomForestRegressor,0.76



Evaluating models for City: Multi_city, Target: grid
Plot for best model of Multi_city - grid saved as ../models/energy_consumption/Multi_city/grid/best_model_plot_grid.png
Best model for Multi_city - grid saved as ../models/energy_consumption/Multi_city/grid/best_model_grid.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.55,159.85,0.3,0.56,125.28,0.32
1,Random Forest,0.15,137.68,0.94,0.22,87.27,0.88
2,XGBoost,0.13,97.51,0.96,0.2,83.25,0.89



Evaluating models for City: Multi_city, Target: bathroom
Plot for best model of Multi_city - bathroom saved as ../models/energy_consumption/Multi_city/bathroom/best_model_plot_bathroom.png
Best model for Multi_city - bathroom saved as ../models/energy_consumption/Multi_city/bathroom/best_model_bathroom.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.0,182.12,0.29,0.0,187.83,0.3
1,Random Forest,0.0,77.69,0.84,0.0,104.59,0.69
2,XGBoost,0.0,116.69,0.77,0.0,124.71,0.64



Evaluating models for City: Multi_city, Target: bedroom
Plot for best model of Multi_city - bedroom saved as ../models/energy_consumption/Multi_city/bedroom/best_model_plot_bedroom.png
Best model for Multi_city - bedroom saved as ../models/energy_consumption/Multi_city/bedroom/best_model_bedroom.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.0,219.15,0.11,0.0,209.6,0.1
1,Random Forest,0.0,72.69,0.76,0.0,97.79,0.46
2,XGBoost,0.0,94.54,0.7,0.0,116.55,0.44



Evaluating models for City: Multi_city, Target: car


  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


Plot for best model of Multi_city - car saved as ../models/energy_consumption/Multi_city/car/best_model_plot_car.png
Best model for Multi_city - car saved as ../models/energy_consumption/Multi_city/car/best_model_car.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.05,inf,0.02,0.05,2879.23,0.0
1,Random Forest,0.03,inf,0.7,0.04,1686.31,0.36
2,XGBoost,0.03,inf,0.66,0.04,1667.65,0.34



Evaluating models for City: Multi_city, Target: diningroom


  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


Plot for best model of Multi_city - diningroom saved as ../models/energy_consumption/Multi_city/diningroom/best_model_plot_diningroom.png
Best model for Multi_city - diningroom saved as ../models/energy_consumption/Multi_city/diningroom/best_model_diningroom.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.0,inf,0.0,0.0,3.67,-0.05
1,Random Forest,0.0,inf,0.07,0.0,3.15,-0.52
2,XGBoost,0.0,inf,-0.0,0.0,3.25,-0.01



Evaluating models for City: Multi_city, Target: livingroom
Plot for best model of Multi_city - livingroom saved as ../models/energy_consumption/Multi_city/livingroom/best_model_plot_livingroom.png
Best model for Multi_city - livingroom saved as ../models/energy_consumption/Multi_city/livingroom/best_model_livingroom.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.01,44.91,0.13,0.01,43.67,0.12
1,Random Forest,0.0,15.62,0.85,0.0,21.69,0.68
2,XGBoost,0.0,12.06,0.88,0.0,20.44,0.7



Evaluating models for City: Multi_city, Target: office


  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


Plot for best model of Multi_city - office saved as ../models/energy_consumption/Multi_city/office/best_model_plot_office.png
Best model for Multi_city - office saved as ../models/energy_consumption/Multi_city/office/best_model_office.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.0,inf,0.05,0.0,25.74,0.05
1,Random Forest,0.0,inf,0.66,0.0,18.99,0.42
2,XGBoost,0.0,inf,0.51,0.0,20.65,0.33



Evaluating models for City: Multi_city, Target: utilityroom
Plot for best model of Multi_city - utilityroom saved as ../models/energy_consumption/Multi_city/utilityroom/best_model_plot_utilityroom.png
Best model for Multi_city - utilityroom saved as ../models/energy_consumption/Multi_city/utilityroom/best_model_utilityroom.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.0,20.96,0.21,0.0,20.35,0.22
1,Random Forest,0.0,11.33,0.75,0.0,16.14,0.5
2,XGBoost,0.0,10.98,0.75,0.0,16.36,0.46



Evaluating models for City: Multi_city, Target: waterheater
Plot for best model of Multi_city - waterheater saved as ../models/energy_consumption/Multi_city/waterheater/best_model_plot_waterheater.png
Best model for Multi_city - waterheater saved as ../models/energy_consumption/Multi_city/waterheater/best_model_waterheater.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.04,1110.58,0.02,0.04,812.89,0.02
1,Random Forest,0.03,992.71,0.17,0.04,725.97,0.13
2,XGBoost,0.03,1014.81,0.16,0.04,740.85,0.12



Evaluating models for City: Multi_city, Target: aircomp
Plot for best model of Multi_city - aircomp saved as ../models/energy_consumption/Multi_city/aircomp/best_model_plot_aircomp.png
Best model for Multi_city - aircomp saved as ../models/energy_consumption/Multi_city/aircomp/best_model_aircomp.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.08,81.68,0.19,0.08,83.0,0.2
1,Random Forest,0.03,24.74,0.88,0.04,35.72,0.76
2,XGBoost,0.03,26.1,0.87,0.04,37.38,0.75



Evaluating models for City: Multi_city, Target: kitchenArea
Plot for best model of Multi_city - kitchenArea saved as ../models/energy_consumption/Multi_city/kitchenArea/best_model_plot_kitchenArea.png
Best model for Multi_city - kitchenArea saved as ../models/energy_consumption/Multi_city/kitchenArea/best_model_kitchenArea.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.03,29.88,0.15,0.03,30.08,0.13
1,Random Forest,0.02,14.44,0.74,0.02,18.46,0.58
2,XGBoost,0.02,14.72,0.74,0.02,18.78,0.57



Evaluating models for City: Multi_city, Target: washer_dryer
Plot for best model of Multi_city - washer_dryer saved as ../models/energy_consumption/Multi_city/washer_dryer/best_model_plot_washer_dryer.png
Best model for Multi_city - washer_dryer saved as ../models/energy_consumption/Multi_city/washer_dryer/best_model_washer_dryer.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.05,1854.23,0.03,0.05,1920.51,0.05
1,Random Forest,0.03,622.78,0.67,0.04,970.29,0.37
2,XGBoost,0.03,707.05,0.6,0.04,1007.08,0.32



Evaluating models for City: Multi_city, Target: other
Plot for best model of Multi_city - other saved as ../models/energy_consumption/Multi_city/other/best_model_plot_other.png
Best model for Multi_city - other saved as ../models/energy_consumption/Multi_city/other/best_model_other.joblib


Unnamed: 0,Model,Train_MAE,Train_MAPE (%),Train_R2,Test_MAE,Test_MAPE (%),Test_R2
0,Linear Regression,0.02,32.43,0.1,0.02,33.4,0.11
1,Random Forest,0.01,15.54,0.77,0.02,22.23,0.55
2,XGBoost,0.01,13.44,0.8,0.01,21.77,0.53



Best Models Summary for City: Multi_city



Unnamed: 0,DataFrame_Name,Target,Best_Model,Test_R2
0,Multi_city,grid,XGBRegressor,0.89
1,Multi_city,bathroom,RandomForestRegressor,0.69
2,Multi_city,bedroom,RandomForestRegressor,0.46
3,Multi_city,car,RandomForestRegressor,0.36
4,Multi_city,diningroom,XGBRegressor,-0.01
5,Multi_city,livingroom,XGBRegressor,0.7
6,Multi_city,office,RandomForestRegressor,0.42
7,Multi_city,utilityroom,RandomForestRegressor,0.5
8,Multi_city,waterheater,RandomForestRegressor,0.13
9,Multi_city,aircomp,RandomForestRegressor,0.76



Combined Best Models Summary for All Cities



Unnamed: 0,DataFrame_Name,Target,Best_Model,Test_R2
0,Austin,grid,XGBRegressor,0.92
1,Austin,bathroom,RandomForestRegressor,0.47
2,Austin,bedroom,XGBRegressor,0.79
3,Austin,car,XGBRegressor,0.6
4,Austin,diningroom,RandomForestRegressor,0.39
5,Austin,livingroom,XGBRegressor,0.75
6,Austin,office,XGBRegressor,0.77
7,Austin,utilityroom,XGBRegressor,0.85
8,Austin,waterheater,XGBRegressor,0.0
9,Austin,aircomp,RandomForestRegressor,0.92
