In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

# Load the data
data = pd.read_excel('D:/Kongsburg/Data/building_energy_data.xlsx')



In [2]:
# Check data types of the columns
print(data.dtypes)


Date (YYYY-MM-DD)                           datetime64[ns]
Time (HH:MM)                                        object
Outdoor Temperature (°C)                             int64
Corporate                                           object
Building Size (m²)                                   int64
Humidity (%)                                         int64
HVAC Energy Consumption (kWh)                        int64
Lighting Energy Consumption (kWh)                    int64
Equipment Energy Consumption (kWh)                   int64
Total Energy Consumption (kWh)                       int64
Corporate Total Energy Consumption (kWh)             int64
HVAC Mode                                           object
Solar Radiation (W/m²)                               int64
Wind Speed (m/s)                                     int64
Precipitation (mm)                                   int64
Occupancy Level (%)                                  int64
Thermostat Settings (°C)                             int

In [3]:
# Convert categorical columns to numerical using one-hot encoding
data_encoded = pd.get_dummies(data, columns=[
    'Corporate', 'HVAC Mode', 'Lighting Schedule', 'Equipment Usage', 
    'Insulation Quality', 'Window Type', 'Public Holidays', 'Special Events'
])

# Display the first few rows of the processed data
print(data_encoded.head())


  Date (YYYY-MM-DD) Time (HH:MM)  Outdoor Temperature (°C)  \
0        2021-08-25        12:38                        20   
1        2021-08-26        12:27                        22   
2        2021-08-26        07:12                        26   
3        2021-08-26        09:34                        28   
4        2021-08-27        09:21                        23   

   Building Size (m²)  Humidity (%)  HVAC Energy Consumption (kWh)  \
0               10000            50                            556   
1               20000            79                            587   
2               20000            79                            862   
3               20000            45                            645   
4               30000            51                           1107   

   Lighting Energy Consumption (kWh)  Equipment Energy Consumption (kWh)  \
0                                183                                 271   
1                                247                  

In [4]:
from sklearn.model_selection import train_test_split

# Define the specific features and the target variable
features = [
    'Outdoor Temperature (°C)', 
    'Solar Radiation (W/m²)', 
    'Insulation Quality_average', 
    'Insulation Quality_good', 
    'Insulation Quality_poor', 
    'Total Energy Consumption (kWh)'
]
target = 'Thermostat Settings (°C)'

# Separate features and target variable
X = data_encoded[features]
y = data_encoded[target]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display the shapes of the resulting datasets
print(f'X_train shape: {X_train.shape}')
print(f'X_test shape: {X_test.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'y_test shape: {y_test.shape}')


X_train shape: (2400, 6)
X_test shape: (600, 6)
y_train shape: (2400,)
y_test shape: (600,)


In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Initialize and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Display the model coefficients
coefficients = model.coef_
intercept = model.intercept_

print(f'Coefficients: {coefficients}')
print(f'Intercept: {intercept}')


Mean Squared Error: 0.7782020200501517
R-squared: 0.7104756594010996
Coefficients: [-1.67840329e-01 -1.03512036e-04  9.49115624e-03  7.55631599e-03
 -1.70474722e-02  1.58923117e-06]
Intercept: 25.533345975448356


In [6]:
import joblib

# Save the trained model to a file
joblib.dump(model, 'thermostat_model.pkl')


['thermostat_model.pkl']

In [7]:
import joblib
import pandas as pd

# Load the model from the file
model = joblib.load('thermostat_model.pkl')

# Example new data for prediction
new_data = pd.DataFrame({
    'Outdoor Temperature (°C)': [22],
    'Solar Radiation (W/m²)': [150],
    'Insulation Quality_average': [1],
    'Insulation Quality_good': [0],
    'Insulation Quality_poor': [0],
    'Total Energy Consumption (kWh)': [1200]
})

# Make predictions
predicted_temperature = model.predict(new_data)
print(f"Predicted Thermostat Setting: {predicted_temperature[0]:.2f}°C")


Predicted Thermostat Setting: 21.84°C
