# Step1: Read data

In [5]:
import pandas as pd

## 1.1 historical, max and min, 13-23

In [8]:
df_max = pd.read_csv('1323max.csv', skiprows = 10)
df_min = pd.read_csv('1323min.csv', skiprows = 10)

In [10]:
df_max.head()

Unnamed: 0,Date,Maximum air temperature at 1.5m (°C)
0,2013-01-01,6.60862
1,2013-01-02,10.57874
2,2013-01-03,10.40448
3,2013-01-04,9.36426
4,2013-01-05,10.02218


In [12]:
df_max.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4017 entries, 0 to 4016
Data columns (total 2 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   Date                                  4017 non-null   object 
 1   Maximum air temperature at 1.5m (°C)  4017 non-null   float64
dtypes: float64(1), object(1)
memory usage: 62.9+ KB


In [14]:
df_min.head()

Unnamed: 0,Date,Minimum air temperature at 1.5m (°C)
0,2013-01-01,3.08765
1,2013-01-02,3.74957
2,2013-01-03,5.67923
3,2013-01-04,7.49624
4,2013-01-05,6.67734


In [16]:
df_min.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4017 entries, 0 to 4016
Data columns (total 2 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   Date                                  4017 non-null   object 
 1   Minimum air temperature at 1.5m (°C)  4017 non-null   float64
dtypes: float64(1), object(1)
memory usage: 62.9+ KB


In [18]:
# rename and merge

In [20]:
df_max.rename(columns={df_max.columns[0]: 'Date', df_max.columns[1]: 'Tmax'}, inplace=True)
df_min.rename(columns={df_min.columns[0]: 'Date', df_min.columns[1]: 'Tmin'}, inplace=True)

In [22]:
# convert datetime
df_max['Date'] = pd.to_datetime(df_max['Date'])
df_min['Date'] = pd.to_datetime(df_min['Date'])

In [24]:
# merge max and min
df_hist = pd.merge(df_max, df_min, on = 'Date')

In [26]:
# calculate daily mean
df_hist['Tavg'] = (df_hist['Tmax'] + df_hist['Tmin']) / 2

In [28]:
# UK baseline temperature
base_hdd = 15.5
base_cdd = 22.0

In [30]:
# calculate HDD CDD
df_hist['HDD'] = (base_hdd - df_hist['Tavg']).clip(lower = 0)
df_hist['CDD'] = (df_hist['Tavg'] - base_cdd).clip(lower = 0)

In [32]:
# aggregate by years
df_hdd_cdd_by_year = df_hist.groupby(df_hist['Date'].dt.year)[['HDD', 'CDD']].sum().reset_index()
df_hdd_cdd_by_year.rename(columns = {'Date' : 'Year'}, inplace = True)
df_hdd_cdd_by_year

Unnamed: 0,Year,HDD,CDD
0,2013,2499.04641,0.0
1,2014,2011.261485,0.0
2,2015,2271.19166,0.9643
3,2016,2263.322705,0.0
4,2017,2146.488055,0.0
5,2018,2247.62087,0.0
6,2019,2252.83076,1.1743
7,2020,2178.07503,0.46897
8,2021,2279.942655,0.0
9,2022,2050.09335,7.20901


## 1.2 future data

In [35]:
# select members' data which covers 2024-2035

In [37]:
file_names = [
    '01.csv', '05.csv', '06.csv', '07.csv', '08.csv', '10.csv',
    '11.csv', '12.csv', '13.csv', '15.csv', '25.csv'
]

# baseline
base_hdd = 15.5
base_cdd = 22.0

member_hdd_cdd_list = []

for file in file_names:
    try:
        df = pd.read_csv(file, skiprows = 13)
        df.rename(columns = {df.columns[0]: 'Date', df.columns[1]: 'Tavg'}, inplace = True)
        
        # type converting
        df['Date'] = pd.to_datetime(df['Date'], errors = 'coerce')
        df['Tavg'] = pd.to_numeric(df['Tavg'], errors = 'coerce')
        df = df.dropna(subset = ['Date', 'Tavg'])
        
        # filter target time period
        df = df[(df['Date'] >= '2024-01-01') & (df['Date'] <= '2035-12-31')]
        
        # calculate HDD/CDD
        df['HDD'] = (base_hdd - df['Tavg']).clip(lower = 0)
        df['CDD'] = (df['Tavg'] - base_cdd).clip(lower = 0)
        df['Year'] = df['Date'].dt.year
        
        # aggregate by years
        df_year = df.groupby('Year')[['HDD', 'CDD']].sum().reset_index()
        df_year['Member'] = file.replace('.csv', '')
        member_hdd_cdd_list.append(df_year)

    except Exception as e:
        print(f'file {name} error: {e}')

In [39]:
# merge all members
df_future_hdd_cdd_all = pd.concat(member_hdd_cdd_list, ignore_index=True)

print(df_future_hdd_cdd_all.head())

   Year          HDD        CDD Member
0  2024  2193.201448   0.000000     01
1  2025  2111.237928  10.813436     01
2  2026  1787.842559   1.212643     01
3  2027  1957.265880   0.000000     01
4  2028  2250.289332   2.921132     01


In [41]:
df_future_hdd_cdd_all.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 132 entries, 0 to 131
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Year    132 non-null    int32  
 1   HDD     132 non-null    float64
 2   CDD     132 non-null    float64
 3   Member  132 non-null    object 
dtypes: float64(2), int32(1), object(1)
memory usage: 3.7+ KB


In [43]:
df_future_avg = df_future_hdd_cdd_all.groupby('Year')[['HDD', 'CDD']].mean().reset_index()

In [45]:
# export
df_future_avg.to_csv('future_HDD_CDD_2024_2035.csv', index=False)
df_hdd_cdd_by_year.to_csv('history_HDD_CDD_2013_2023.csv', index=False)

In [47]:
# combine
df_combined = pd.concat([df_hdd_cdd_by_year, df_future_avg], ignore_index=True)

In [49]:
df_combined

Unnamed: 0,Year,HDD,CDD
0,2013,2499.04641,0.0
1,2014,2011.261485,0.0
2,2015,2271.19166,0.9643
3,2016,2263.322705,0.0
4,2017,2146.488055,0.0
5,2018,2247.62087,0.0
6,2019,2252.83076,1.1743
7,2020,2178.07503,0.46897
8,2021,2279.942655,0.0
9,2022,2050.09335,7.20901


In [51]:
df_combined.to_csv('HDD_CDD_2013_2035.csv', index=False)