In [1]:
%run stdPackages.ipynb # this imports a lot of useful packages

#%run/stdPackages.ipynb
#test2
#tes test

In [2]:
import base
import os 

def read_csv_with_path(directory_path, file_name):
    # Use os.path.join to create the full file path
    file_path = os.path.join(directory_path, file_name)
    
    # Use the read_csv function to load the data into a pandas DataFrame
    df = pd.read_csv(file_path, header=0)
    
    return df

**Parameters and database**

In [110]:
#Creating database
db = pyDbs.SimpleDB()
#Adding parameters
#MWP
MarginalWillingness = pd.Series([50], name = 'MWP', index=['c1'])
MarginalWillingness.index.name = 'c'
db['MWP'] = MarginalWillingness
#Electricity storage capacity
storage = pd.Series([0], name = 'scap', index=['Storage'])

storage.index.name = 'id'
db['sCap'] = storage

#Charge/discharge efficiency (MUST BE UPDATED ACCORDING TO ENERGIKATALOG)
efficiency = pd.Series([0.9], name = 'effs', index=['Storage'])
efficiency.index.name = 'id'
db['effS'] = efficiency
db['MWP']

c
c1    50
Name: MWP, dtype: int64

**Load variation and actual load**

In [102]:
#Creating path to working directory
curr = os.getcwd()
#Creating a dataframe with data
HourlyLoad_df = read_csv_with_path(curr,"Total_load.csv")
#Removing potential missing obs (In this case it is the blank hour from when changing to summer time)
HourlyLoad_df = HourlyLoad_df.dropna()
#Creating an index for hours in a year
index_hours_year = range(1,8761)
HourlyLoad_df.index = pd.MultiIndex.from_product([['c1'], index_hours_year], names=['c', 'h'])
HourlyLoad_df = HourlyLoad_df['Actual Total Load [MW] - BZN|DK2']
HourlyLoad_df
#Note that we have the actual data. So we do not need to calculate anything in the model, i.e., no need for Load X Load Variation


c   h   
c1  1       1387.0
    2       1340.0
    3       1284.0
    4       1244.0
    5       1230.0
             ...  
    8756    1584.0
    8757    1466.0
    8758    1408.0
    8759    1350.0
    8760    1325.0
Name: Actual Total Load [MW] - BZN|DK2, Length: 8760, dtype: float64

In [103]:
#Tilføjer databasen
db['Max hourly load'] = HourlyLoad_df
db['Max hourly load']

c   h   
c1  1       1387.0
    2       1340.0
    3       1284.0
    4       1244.0
    5       1230.0
             ...  
    8756    1584.0
    8757    1466.0
    8758    1408.0
    8759    1350.0
    8760    1325.0
Name: Actual Total Load [MW] - BZN|DK2, Length: 8760, dtype: float64

**Generating Capacity**

In [36]:
InstalledCap_df = read_csv_with_path(curr,"Installed_capacity.csv")

#Removing N/E values such that we only have generators with generation values
InstalledCap_df['2022 [MW]'].replace(['n/e', 0], pd.NA, inplace=True)

# Drop rows where '2022 [MW]' is NaN or 0
InstalledCap_df = InstalledCap_df.dropna(subset=['2022 [MW]'])

# Convert the '2022 [MW]' column to numeric and round to 2 decimal places
InstalledCap_df['2022 [MW]'] = pd.to_numeric(InstalledCap_df['2022 [MW]'], errors='coerce').round(2)

# Drop rows where '2022 [MW]' is less than or equal to 1 and removing total row
InstalledCap_df = InstalledCap_df[InstalledCap_df['2022 [MW]'] > 0]
InstalledCap_df = InstalledCap_df[InstalledCap_df['Production Type']!= 'Total Grand capacity'].rename(columns={'Production Type': 'id'})

#Resetting the index to the name of plant renamed to id then renaming column
InstalledCap_df.set_index('id', inplace=True)
InstalledCap_df = InstalledCap_df.rename(columns={'2022 [MW]': 'GeneratingCapacity'})

#Finally we add it to the db
db['GeneratingCapacity'] = InstalledCap_df
db['GeneratingCapacity']

Unnamed: 0_level_0,GeneratingCapacity
id,Unnamed: 1_level_1
Biomass,1179
Fossil Gas,603
Fossil Hard coal,1222
Fossil Oil,801
Other renewable,24
Solar,450
Waste,173
Wind Offshore,1028
Wind Onshore,756


**Capacity variation**

In [89]:
ActualGen_df = read_csv_with_path(curr,"Actual_generation.csv")
ActualGen_df.replace(['n/e', 0], pd.NA, inplace=True)

#Dropping the empty hour from changing to summer time
ActualGen_df = ActualGen_df[ActualGen_df['MTU'] != '27.03.2022 02:00 - 27.03.2022 03:00 (CET/CEST)']
#Laver en variabel for hver time på en døgn ud fra dato værdien
ActualGen_df['h'] = ActualGen_df['MTU'].str[11:16]


#Creating a new pandas series that will containt the hourly capacity variation
CapVariation_df = pd.DataFrame()
#Adding columns
CapVariation_df['Wind Offshore'] = ActualGen_df['Wind Offshore  - Actual Aggregated [MW]']/1028
CapVariation_df['Wind Onshore'] = ActualGen_df['Wind Onshore  - Actual Aggregated [MW]']/756
CapVariation_df['Solar'] = ActualGen_df['Solar  - Actual Aggregated [MW]']/450
#Adding this column to account for all none intermittent generators with non-varying capacity
CapVariation_df['Standard'] = 1
CapVariation_df.fillna(0, inplace=True)

#Creating an index of the hours of the year
index_year_hours = range(1, 8761)
CapVariation_df.index = index_year_hours
CapVariation_df.index.name = 'h'

CapVariation_df.head()


Unnamed: 0_level_0,Wind Offshore,Wind Onshore,Solar,Standard
h,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,0.788911,0.415344,0.0,1
2,0.830739,0.451058,0.0,1
3,0.855058,0.436508,0.0,1
4,0.777237,0.37037,0.0,1
5,0.626459,0.35582,0.0,1


In [97]:
#Transforming data into required format to run the model from class
melted_df = pd.melt(CapVariation_df.reset_index(), id_vars=['h'], var_name='hvt', value_name='CapVariation')
#Setting the index
melted_df = melted_df.set_index(['h', 'hvt'])

melted_df = melted_df.sort_values(by=['h','hvt'])
# Print the resulting DataFrame
melted_df.head(16)

#Adding to the database
db['CapVariation'] = melted_df
print(db['CapVariation'])

                    CapVariation
h    hvt                        
1    Solar              0.000000
     Standard           1.000000
     Wind Offshore      0.788911
     Wind Onshore       0.415344
2    Solar              0.000000
...                          ...
8759 Wind Onshore       0.318783
8760 Solar              0.000000
     Standard           1.000000
     Wind Offshore      0.864786
     Wind Onshore       0.362434

[35040 rows x 1 columns]
