In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error


table_data = pd.read_excel('table_data.xlsx')
sensor_data = pd.read_excel('sensor_data.xlsx')

table_data['DateTime'] = pd.to_datetime(table_data['DateTime'])
sensor_data['DateTime'] = pd.to_datetime(sensor_data['DateTime'])

sensor_data['DateTime'] = sensor_data['DateTime'].dt.floor('10min')


In [None]:

merged_data = pd.merge(table_data, sensor_data, on='DateTime', how='inner')

merged_data['Year'] = merged_data['DateTime'].dt.year
merged_data['Month'] = merged_data['DateTime'].dt.month
merged_data['Day'] = merged_data['DateTime'].dt.day
merged_data['Hour'] = merged_data['DateTime'].dt.hour
merged_data['Minute'] = merged_data['DateTime'].dt.minute




In [None]:
def scale_features(data, target_col=None):
    numerical_cols = data.select_dtypes(include=['float64', 'int64']).columns
    scaler = MinMaxScaler()
    data[numerical_cols] = scaler.fit_transform(data[numerical_cols])
    return data

merged_data.fillna(merged_data.mean(), inplace=True)


print(scaled_data.head())


In [None]:

selected_features = ['Year','Month','Day','Hour','Minute,Month','RH [%]', 'WBT_C', 'GPM', 'kW_Tot', 'DeltaCHW']
X = scaled_data[selected_features]
y = scaled_data['CH Load']
X['RH [%]', 'WBT_C', 'GPM', 'kW_Tot', 'DeltaCHW']= scale_features(X['RH [%]', 'WBT_C', 'GPM', 'kW_Tot', 'DeltaCHW'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

from sklearn.metrics import r2_score

r_squared = r2_score(y_test, y_pred)
print(f'R-squared Score: {r_squared}')


In [None]:
import matplotlib.pyplot as plt

feature_importances = model.feature_importances_
features = X.columns

plt.figure(figsize=(10, 6))
plt.barh(features, feature_importances)
plt.xlabel('Importance')
plt.title('Feature Importance')
plt.show()


In [None]:
def predict_ch_load(model, scaler, X):
    print("Please enter the following details for CH Load prediction:")

    month = int(input("Enter the month:"))
    rh = float(input("Relative Humidity [%]: "))
    wbt = float(input("WBT [°C]: "))
    gpm = float(input("Flowrate GPM: "))
    kw_tot = float(input("kW_Tot: "))
    delta_chw = float(input("Delta CHW [°C]: "))


    input_data = pd.DataFrame([[rh, wbt, gpm, kw_tot, delta_chw]],
                              columns=['RH [%]', 'WBT_C', 'GPM', 'kW_Tot', 'DeltaCHW'])


    input_data_scaled = scaler.transform(input_data)
    predicted_ch_load_scaled = model.predict(input_data_scaled)


    predicted_ch_load = model.predict(input_data_scaled)
    min_CH_load = merged_data['CH Load'].min()
    max_CH_load = merged_data['CH Load'].max()
    predicted_ch_load = (predicted_ch_load_scaled * (max_CH_load - min_CH_load)) + min_CH_load

    print(f"Predicted CH Load: {predicted_ch_load[0]:.2f}")


predict_ch_load(model, MinMaxScaler().fit(X), X)


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, color='red', label='Predicted', marker='x')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='blue', lw=2, label='Perfect Fit Line')
plt.xlabel("Actual CH Load")
plt.ylabel("Predicted CH Load")
plt.title("Actual vs Predicted CH Load")
plt.legend()
plt.show()

plt.figure(figsize=(10, 6))
plt.scatter(y_train, y_train, color='blue', label='Actual (Train)', marker='o')
plt.scatter(y_train, train_predictions, color='red', label='Predicted (Train)', marker='x')
plt.xlabel("Actual CH Load (Train)")
plt.ylabel("Predicted CH Load (Train)")
plt.title("Actual vs Predicted CH Load (Training Data)")
plt.legend()
plt.show()

