In [3]:
import pandas as pd

# Load the CSV files
weather2_df = pd.read_csv('weather2.csv', delimiter=';')
probe_temp_429_df = pd.read_csv('sensordata/Probe temp 429.csv', delimiter=';')
temp_bale_innl_6480_df = pd.read_csv('sensordata/Temp BALE innL 6480.csv', delimiter=';')

# Clean and parse the weather2_df
weather2_df.columns = ['Navn', 'Stasjon', 'Tid', 'Nedbør_12t', 'Lufttemperatur', 'Middelvind', 'Vindretning']
weather2_df['Tid'] = pd.to_datetime(weather2_df['Tid'], format='%d.%m.%Y %H:%M')
weather2_df.set_index('Tid', inplace=True)

# Clean and parse the probe_temp_429_df
probe_temp_429_df.columns = ['sn', 'epoch_time', 'unit', 'value', 'timestamp']
probe_temp_429_df['timestamp'] = pd.to_datetime(probe_temp_429_df['timestamp'], format='%a, %d %b %Y %H:%M:%S %Z')
probe_temp_429_df.set_index('timestamp', inplace=True)
probe_temp_429_df.drop(columns=['sn', 'epoch_time', 'unit'], inplace=True)

# Clean and parse the temp_bale_innl_6480_df
temp_bale_innl_6480_df.columns = ['sn', 'epoch_time', 'unit', 'value', 'timestamp']
temp_bale_innl_6480_df['timestamp'] = pd.to_datetime(temp_bale_innl_6480_df['epoch_time'], unit='ms')
temp_bale_innl_6480_df.set_index('timestamp', inplace=True)
temp_bale_innl_6480_df.drop(columns=['sn', 'epoch_time', 'unit'], inplace=True)

# Ensure all datetime indices are timezone-naive
weather2_df.index = weather2_df.index.tz_localize(None)
probe_temp_429_df.index = probe_temp_429_df.index.tz_localize(None)
temp_bale_innl_6480_df.index = temp_bale_innl_6480_df.index.tz_localize(None)

# Remove duplicates
probe_temp_429_df = probe_temp_429_df[~probe_temp_429_df.index.duplicated(keep='first')]
temp_bale_innl_6480_df = temp_bale_innl_6480_df[~temp_bale_innl_6480_df.index.duplicated(keep='first')]

# Resample the sensor dataframes to hourly frequency
probe_temp_429_df_resampled = probe_temp_429_df.resample('1H').nearest()
probe_temp_429_df_resampled.rename(columns={'value': 'Probe temp 429'}, inplace=True)

temp_bale_innl_6480_df_resampled = temp_bale_innl_6480_df.resample('1H').nearest()
temp_bale_innl_6480_df_resampled.rename(columns={'value': 'Temp BALE innL 6480'}, inplace=True)

# Join the dataframes
combined_df = weather2_df.join(probe_temp_429_df_resampled, how='outer').join(temp_bale_innl_6480_df_resampled, how='outer')

# Select and rearrange columns
combined_df = combined_df[['Nedbør_12t', 'Lufttemperatur', 'Middelvind', 'Vindretning', 'Probe temp 429', 'Temp BALE innL 6480']]

# Display the first few rows of the combined dataframe
print(combined_df.head(200))


                    Nedbør_12t Lufttemperatur Middelvind Vindretning  \
Tid                                                                    
NaT                        NaN            NaN        NaN         NaN   
2023-05-01 01:00:00          -              0          -           -   
2023-05-01 01:00:00          -            2,5        4,1         279   
2023-05-01 02:00:00          -           -0,1          -           -   
2023-05-01 02:00:00          -            2,2        2,7         283   
...                        ...            ...        ...         ...   
2023-05-05 02:00:00          -            0,1          -           -   
2023-05-05 02:00:00          -            0,5        3,8          51   
2023-05-05 03:00:00          -            0,1          -           -   
2023-05-05 03:00:00          -           -0,4        3,9          32   
2023-05-05 04:00:00          -           -1,3          -           -   

                     Probe temp 429  Temp BALE innL 6480  
Tid 