In [1]:
import pandas as pd
import matplotlib.pyplot as ply


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
ds1 = pd.read_csv("production_per_group_mba_hour-all-no-0000-00-00.csv", delimiter=";")

#ds1.head()

dtype = ds1['VOLUM_KWH'].dtype
if dtype == 'float64':
    print("Column is a float")
elif dtype == 'object':
    print("Column is a string")
else:
    print("Column is neither a string nor a float")


Column is a string


In [3]:
ds1.tail()

Unnamed: 0,STARTTID,SLUTTID,PRISOMRÅDE,PRODUKSJONSTYPE,VOLUM_KWH
719036,2023-12-31T23:00:00+01:00,2024-01-01T00:00:00+01:00,NO5,Vann uspesifisert,1988294302
719037,2023-12-31T23:00:00+01:00,2024-01-01T00:00:00+01:00,NO5,Annen uspesifisert,0
719038,2023-12-31T23:00:00+01:00,2024-01-01T00:00:00+01:00,NO5,Sol uspesifisert,53894
719039,2023-12-31T23:00:00+01:00,2024-01-01T00:00:00+01:00,NO5,Termisk uspesifisert,15734000
719040,2023-12-31T23:00:00+01:00,2024-01-01T00:00:00+01:00,NO5,Vind uspesifisert,0


In [4]:

# Convert STARTTID to DateTime, including timezone
ds1['STARTTID'] = pd.to_datetime(ds1['STARTTID'], utc=True)

# Convert STARTTID to just date, ignoring time and timezone
ds1['DATE'] = ds1['STARTTID'].dt.date

# Replace commas with dots and convert VOLUM_KWH and ANTALL_MÅLEPUNKT to float
ds1['VOLUM_KWH'] = ds1['VOLUM_KWH'].str.replace(',', '.').astype(float)

# Define the range of PRISOMRÅDE values to filter
prisområde_values = ['NO1', 'NO2', 'NO3', 'NO4', 'NO5']

# Filter for PRISOMRÅDE values in the defined range
filtered_ds1 = ds1[ds1['PRISOMRÅDE'].isin(prisområde_values)]

# Group by DATE and PRISOMRÅDE, and summarize VOLUM_KWH and ANTALL_MÅLEPUNKT
summary = filtered_ds1.groupby(['DATE', 'PRISOMRÅDE']).agg({
    'VOLUM_KWH': 'sum',
}).reset_index()

print(summary)


            DATE PRISOMRÅDE     VOLUM_KWH
0     2020-09-08        NO1  2.168865e+06
1     2020-09-08        NO2  9.753790e+06
2     2020-09-08        NO3  5.612005e+06
3     2020-09-08        NO4  4.917535e+06
4     2020-09-08        NO5  7.789809e+06
...          ...        ...           ...
6045  2023-12-31        NO1  3.292689e+07
6046  2023-12-31        NO2  1.002392e+08
6047  2023-12-31        NO3  6.385201e+07
6048  2023-12-31        NO4  8.590868e+07
6049  2023-12-31        NO5  4.967725e+07

[6050 rows x 3 columns]


In [5]:
summary.head()


Unnamed: 0,DATE,PRISOMRÅDE,VOLUM_KWH
0,2020-09-08,NO1,2168865.186
1,2020-09-08,NO2,9753790.419
2,2020-09-08,NO3,5612004.6
3,2020-09-08,NO4,4917534.632
4,2020-09-08,NO5,7789808.636


In [6]:
summary.tail()

Unnamed: 0,DATE,PRISOMRÅDE,VOLUM_KWH
6045,2023-12-31,NO1,32926890.0
6046,2023-12-31,NO2,100239200.0
6047,2023-12-31,NO3,63852010.0
6048,2023-12-31,NO4,85908680.0
6049,2023-12-31,NO5,49677250.0


In [7]:
summary.to_csv("Summert_produksjon_sone.csv")