In [7]:
import pandas as pd
import os

folder = "C:/Users/harja/Downloads/Datasets for temperature and energy demand modelling-20250729/Demand Data/ALL_DEMAND_DATA"

dfs = []
for file in os.listdir(folder):
    if file.endswith(".csv"):  # adjust if .txt
        path = os.path.join(folder, file)
        df = pd.read_csv(path)
        dfs.append(df)

all_demand = pd.concat(dfs, ignore_index=True)
all_demand.head()


Unnamed: 0,REGION,SETTLEMENTDATE,TOTALDEMAND,RRP,PERIODTYPE
0,NSW1,2000/01/01 00:30,6763.57,15.64,TRADE
1,NSW1,2000/01/01 01:00,6386.10167,14.06,TRADE
2,NSW1,2000/01/01 01:30,5990.795,14.3,TRADE
3,NSW1,2000/01/01 02:00,5655.97667,14.28,TRADE
4,NSW1,2000/01/01 02:30,5283.83667,14.17,TRADE


In [11]:
import pandas as pd
import glob

# get all QLD demand files
demand_qld = glob.glob("C:/Users/harja/Downloads/Datasets for temperature and energy demand modelling-20250729/Demand Data/ALL_DEMAND_DATA/*_QLD*.csv")
demand_qld.sort()

# read and combine
df_list = [pd.read_csv(f) for f in demand_qld]
if df_list:
    demand_df = pd.concat(df_list, ignore_index=True)
    print("Demand data loaded:")
    print(demand_df.head())
else:
    print("No QLD CSV files found.")

# rename and drop unnecessary columns
demand_df = demand_df.rename(columns={
    "SETTLEMENTDATE": "Datetime",
    "TOTALDEMAND": "Total Demand"
})
demand_df = demand_df.drop(columns=["REGION", "PERIODTYPE"], errors='ignore')
print(demand_df.head())




Demand data loaded:
  REGION    SETTLEMENTDATE  TOTALDEMAND    RRP PERIODTYPE
0   QLD1  2000/01/01 00:30   3905.56833  39.40      TRADE
1   QLD1  2000/01/01 01:00   3855.67500  34.18      TRADE
2   QLD1  2000/01/01 01:30   3814.44667  35.20      TRADE
3   QLD1  2000/01/01 02:00   3705.36500  25.53      TRADE
4   QLD1  2000/01/01 02:30   3615.71333  18.55      TRADE
           Datetime  Total Demand    RRP
0  2000/01/01 00:30    3905.56833  39.40
1  2000/01/01 01:00    3855.67500  34.18
2  2000/01/01 01:30    3814.44667  35.20
3  2000/01/01 02:00    3705.36500  25.53
4  2000/01/01 02:30    3615.71333  18.55


In [23]:
import pandas as pd

# path to QLD weather file
weather_file = r"Temperature Data\HM01X_Data_040913_999999999743964.txt"

# read weather data
weather_df = pd.read_csv(weather_file)

# combine datetime into one variable
weather_df['Datetime'] = pd.to_datetime({
    'year': weather_df['Year Month Day Hour Minutes in YYYY'],
    'month': weather_df['MM'],
    'day': weather_df['DD'],
    'hour': weather_df['HH24'],
    'minute': weather_df['MI format in Local time']
})
weather_df['Datetime'] = weather_df['Datetime'].dt.strftime('%Y/%m/%d %H:%M')

# rename columns
weather_df = weather_df.rename(columns={
    "Precipitation since 9am local time in mm": "Precipitation",
    "Air Temperature in degrees C": "Air Temp",
    "Relative humidity in percentage %": "Humidity",
    "Wind speed in km/h": "Wind Speed"
})

# convert numeric
cols_to_convert = ["Precipitation", "Air Temp", "Humidity", "Wind Speed"]
weather_df[cols_to_convert] = weather_df[cols_to_convert].apply(pd.to_numeric, errors='coerce')

print("Weather data loaded successfully:")
print(weather_df.head())


  weather_df = pd.read_csv(weather_file)


Weather data loaded successfully:
   hm  Station Number  Year Month Day Hour Minutes in YYYY  MM  DD  HH24  \
0  hm           40913                                 2000   1   1     0   
1  hm           40913                                 2000   1   1     0   
2  hm           40913                                 2000   1   1     1   
3  hm           40913                                 2000   1   1     1   
4  hm           40913                                 2000   1   1     2   

   MI format in Local time  Year Month Day Hour Minutes in YYYY.1  MM.1  DD.1  \
0                        0                                   2000     1     1   
1                       30                                   2000     1     1   
2                        0                                   2000     1     1   
3                       30                                   2000     1     1   
4                        0                                   2000     1     1   

   ...  Wind direction

In [24]:
# merge demand and weather data
merged_qld_df = pd.merge(
    demand_df,
    weather_df[["Datetime", "Precipitation", "Air Temp", "Humidity", "Wind Speed"]],
    on='Datetime',
    how='inner'
)

# fill precipitation not recorded as 'NR'
merged_qld_df["Precipitation"] = merged_qld_df["Precipitation"].fillna("NR")

# drop duplicates and NaNs
merged_qld_df = merged_qld_df.drop_duplicates()
merged_qld_df = merged_qld_df.dropna()

print("Merged dataset preview:")
print(merged_qld_df.head())

# save cleaned data
merged_qld_df.to_csv("QLD_clean.csv", index=False)




Merged dataset preview:
           Datetime  Total Demand    RRP Precipitation  Air Temp  Humidity  \
0  2000/01/01 00:30    3905.56833  39.40           0.0      22.0      75.0   
1  2000/01/01 01:00    3855.67500  34.18           0.0      21.7      78.0   
2  2000/01/01 01:30    3814.44667  35.20           0.0      21.4      82.0   
3  2000/01/01 02:00    3705.36500  25.53           0.0      21.5      83.0   
4  2000/01/01 02:30    3615.71333  18.55           0.0      21.3      82.0   

   Wind Speed  
0        13.0  
1        11.2  
2        11.2  
3         9.4  
4        11.2  


In [25]:
# rename columns to match the example format
merged_qld_df_formatted = merged_qld_df.rename(columns={
    "Total Demand": "Total Demand",
    "RRP": "RRP",
    "Precipitation": "Precipitation (mm)",
    "Air Temp": "Air Temp (C)",
    "Humidity": "Humidity (%)",
    "Wind Speed": "Wind Speed (km/h)"
})

# save to Excel
merged_qld_df_formatted.to_excel("QLD_clean_formatted.xlsx", index=False)

print("QLD dataset exported to Excel as QLD_clean_formatted.xlsx")


QLD dataset exported to Excel as QLD_clean_formatted.xlsx
