In [23]:
import pandas as pd
import os

**This is designed for data processing and merging in the context of HVAC (Heating,
Ventilation, and Air - Conditioning). It first loads metadata from building and weather 
information CSV files. Then, it combines multiple building and weather data files 
respectively, skipping specific rows and handling duplicates. After standardizing 
timestamps, it merges the two datasets based on timestamps and saves the final merged data.**

In [24]:
# 1. Load Metadata
building_info = pd.read_csv('../../HVAC/Building_Information.csv')
weather_info = pd.read_csv('../../HVAC/Weather_Information.csv')

In [25]:
# 2. Merge building data files (modified processing unit rows)
def load_and_combine_building_data(folder_path):
    # Get all building data files
    building_files = [f for f in os.listdir(folder_path) 
                     if f.startswith('Building_') and f.endswith('.csv') 
                     and not 'Information' in f]
    
    dfs = []
    for file in building_files:
        # Read each file, skipping the second unit line
        df = pd.read_csv(f'{folder_path}/{file}', skiprows=[1])
        # Add filenames as identifiers
        df['source_file'] = file
        dfs.append(df)
    
    # Consolidation of all building data
    building_df = pd.concat(dfs, ignore_index=True)
    
    # Sorting and de-duplication by timestamp
    building_df = building_df.sort_values('TIMESTAMP').drop_duplicates('TIMESTAMP')
    
    return building_df

building_data = load_and_combine_building_data('../../HVAC')

In [26]:
# 3. Merge Weather Data Files (Modified Processing Unit Row)
def load_and_combine_weather_data(folder_path):
    # Get all weather data files
    weather_files = [f for f in os.listdir(folder_path) 
                    if f.startswith('Weather_') and f.endswith('.csv') 
                    and not 'Information' in f]
    
    dfs = []
    for file in weather_files:
        # Read each file, skipping the second unit line
        df = pd.read_csv(f'{folder_path}/{file}', skiprows=[1])
        # Add filenames as identifiers
        df['source_file'] = file
        dfs.append(df)
    
    # Consolidate all weather data
    weather_df = pd.concat(dfs, ignore_index=True)
    
    # Sorting and de-duplication by timestamp
    weather_df = weather_df.sort_values('TIMESTAMP').drop_duplicates('TIMESTAMP')
    
    return weather_df

weather_data = load_and_combine_weather_data('../../HVAC')

In [27]:
# 4. Standardized timestamp format
def standardize_timestamps(df, time_column='TIMESTAMP'):
    # Make sure the time column is of type datetime
    df[time_column] = pd.to_datetime(df[time_column])
    return df

building_data = standardize_timestamps(building_data)
weather_data = standardize_timestamps(weather_data)



In [28]:
# 5. Merge building and weather data
def merge_building_weather(building_df, weather_df):
    # Merge using timestamps as keys
    merged_df = pd.merge_asof(
        building_df.sort_values('TIMESTAMP'),
        weather_df.sort_values('TIMESTAMP'),
        on='TIMESTAMP',
        direction='nearest',
        tolerance=pd.Timedelta('15min'))
    return merged_df

final_data = merge_building_weather(building_data, weather_data)

In [29]:
# 6. Preservation of consolidated data
output_path = '../../outputs/HVAC_merged_data.csv'
final_data.to_csv(output_path, index=False)

print("Data merge complete, shape.", final_data.shape)
print("The merged data has been saved to.", output_path)
print("\nPreview of the first 5 rows of data.")
display(final_data.head())

Data merge complete, shape. (64798, 86)
The merged data has been saved to. ../../outputs/HVAC_merged_data.csv

Preview of the first 5 rows of data.


Unnamed: 0,TIMESTAMP,T_Stair_101,T_Room_102,T_Room_103,T_Room_104,T_Room_105,T_Room_106,T_Stair_201,T_Room_202,T_Room_203,...,source_file_x,T_out,RH_out,BP,Dir_Solar,Dif_Solar,Glo_Solar,WS,WD,source_file_y
0,2021-02-23 00:00:00,10.788889,15.583333,20.422222,16.405556,19.25,17.844444,12.988889,18.905556,16.388889,...,Building_FF_Heating.csv,1.25,89.8,98377.8156,0.106,0.092,-1.358,0.0,0.0,Weather_FF_Heating.csv
1,2021-02-23 00:01:00,10.783333,15.583333,20.388889,16.405556,19.233333,17.816667,12.983333,18.933333,16.388889,...,Building_FF_Heating.csv,1.266667,89.5,98377.8156,0.053,0.092,-1.409,0.0,0.0,Weather_FF_Heating.csv
2,2021-02-23 00:02:00,10.783333,15.561111,20.422222,16.388889,19.244444,17.833333,12.972222,18.911111,16.411111,...,Building_FF_Heating.csv,1.3,89.1,98377.8156,0.106,0.092,-1.358,0.0,0.0,Weather_FF_Heating.csv
3,2021-02-23 00:03:00,10.783333,15.577778,20.383333,16.394444,19.244444,17.805556,12.972222,18.933333,16.361111,...,Building_FF_Heating.csv,1.361111,88.5,98377.8156,0.106,0.138,-1.409,0.0,0.0,Weather_FF_Heating.csv
4,2021-02-23 00:04:00,10.783333,15.544444,20.411111,16.388889,19.233333,17.827778,12.961111,18.933333,16.394444,...,Building_FF_Heating.csv,1.35,88.3,98377.8156,0.053,0.092,-1.409,0.0,0.0,Weather_FF_Heating.csv
