In [None]:
#This code finds the linear regression of several CSV files to find R and C

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Path to the folder containing the CSV files
folder_path = "DATA"

# List to store results
results = []

# Function to process each CSV file
def process_file(file_path):
    data = pd.read_csv(file_path)
    data['Timestamp'] = pd.to_datetime(data['Timestamp'])
    data = data[['Timestamp', 'External_Air_Temperature', 'Internal_Air_Temperature', 'Heat_Pump_Energy_Output']]
    data['Time_Diff'] = data['Timestamp'].diff().dt.total_seconds()

    # Calculate incremental heat pump energy in kW
    data['Heat_Pump_Energy_Incremental'] = data['Heat_Pump_Energy_Output'].diff()
    data['Heat_Pump_Energy_Incremental_kW'] = data['Heat_Pump_Energy_Incremental'] / (data['Time_Diff'] / 3600)

    # Resample data to 10-minute intervals (averaging each 5x2-min intervals)
    grouped_data = []
    i = 0
    while i < len(data) - 4:
        avg_row = data.iloc[i:i+5].mean()
        avg_row['Timestamp'] = data['Timestamp'].iloc[i+4]
        grouped_data.append(avg_row)
        i += 5
    aggregated_data = pd.DataFrame(grouped_data)
  

    # Filter data for nighttime hours (21:00 to 05:00)
    aggregated_data['Hour'] = aggregated_data['Timestamp'].dt.hour
    night_data = aggregated_data[(aggregated_data['Hour'] >= 21) | (aggregated_data['Hour'] < 5)].copy()


    # Drop rows with missing values in relevant columns
    night_data.dropna(subset=['External_Air_Temperature', 'Internal_Air_Temperature', 'Heat_Pump_Energy_Incremental_kW'], inplace=True)

  

    # Initialise new columns
    night_data['dT_in/dt'] = np.nan
    night_data['Heat_Pump_Energy_Incremental_kW_2'] = np.nan
    for i in range(1, len(night_data)):
        time_diff = (night_data.iloc[i]['Timestamp'] - night_data.iloc[i-1]['Timestamp']).total_seconds()
        if time_diff <= 1200:
            night_data.at[night_data.index[i], 'dT_in/dt'] = (night_data.iloc[i]['Internal_Air_Temperature'] - night_data.iloc[i-1]['Internal_Air_Temperature']) / (time_diff / 3600)
            night_data.at[night_data.index[i], 'Heat_Pump_Energy_Incremental_kW_2'] = (night_data.iloc[i]['Heat_Pump_Energy_Output'] - night_data.iloc[i-1]['Heat_Pump_Energy_Output']) / (time_diff / 3600)

    # Drop rows with Heat_Pump_Energy_Incremental_kW higher than 20 kW
    night_data = night_data[night_data['Heat_Pump_Energy_Incremental_kW_2'] < 20].copy()
   
    # Drop rows with missing values in the new calculated columns
    night_data.dropna(subset=['dT_in/dt', 'Heat_Pump_Energy_Incremental_kW_2'], inplace=True)

    # Define variables for regression
    T_in = night_data['Internal_Air_Temperature']
    T_out = night_data['External_Air_Temperature']
    Q = night_data['Heat_Pump_Energy_Incremental_kW_2']
    dT_in_dt = night_data['dT_in/dt']

    # Prepare data for linear regression
    X = np.vstack(((T_out - T_in), Q)).T
    y = dT_in_dt.values

    # Perform initial linear regression forcing the intercept to zero
    initial_model = LinearRegression(fit_intercept=False)
    initial_model.fit(X, y)

    # Obtain initial coefficients
    beta_1, beta_2 = initial_model.coef_

    # Calculate initial R and C
    C_initial = 1 / beta_2
    R_initial = beta_2 / beta_1

    # Calculate RMSE
    rmse = np.sqrt(mean_squared_error(y, initial_model.predict(X)))

    # Return results
    return file_path, R_initial, C_initial, R_initial * C_initial, rmse

# Iterate over all CSV files in the folder and process them
for filename in os.listdir(folder_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(folder_path, filename)
        try:
            result = process_file(file_path)
            results.append(result)
        except Exception as e:
            print(f"Error processing {file_path}: {e}")

# Save results to an Excel file
results_df = pd.DataFrame(results, columns=['File Name', 'R', 'C', 'TTC', 'RMSE'])
results_df.to_excel("results2.xlsx", index=False)

print("Processing completed. Results saved to results2.xlsx.")
