In [1]:
!pip install darts -qq

In [2]:
import pandas as pd
from darts import TimeSeries
import glob

# all csv 
csv_files = glob.glob('DATA/*.csv')

# df
df = pd.DataFrame()

# use a for loop to iterate through the CSV files and load each one into a DataFrame
for file in csv_files:
    temp_df = pd.read_csv(file)
    df = pd.concat([df, temp_df], ignore_index=True)
    
    
# reset the index of the DataFrame
df = df.reset_index(drop=True)

# Read a pandas DataFrame
#df = pd.read_csv("AirPassengers.csv", delimiter=",")


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26307 entries, 0 to 26306
Data columns (total 4 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   MTU (CET/CEST)             26307 non-null  object 
 1   Day-ahead Price [EUR/MWh]  26304 non-null  float64
 2   Currency                   26304 non-null  object 
 3   BZN|DK1                    0 non-null      float64
dtypes: float64(2), object(2)
memory usage: 822.2+ KB


In [4]:
df.drop('BZN|DK1', inplace = True, axis =1)

In [5]:
# re-name the feature of date and time
df = df.rename(columns={'MTU (CET/CEST)': 'date&time'})
#df

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26307 entries, 0 to 26306
Data columns (total 3 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   date&time                  26307 non-null  object 
 1   Day-ahead Price [EUR/MWh]  26304 non-null  float64
 2   Currency                   26304 non-null  object 
dtypes: float64(1), object(2)
memory usage: 616.7+ KB


In [7]:
df.head()

Unnamed: 0,date&time,Day-ahead Price [EUR/MWh],Currency
0,01.01.2020 00:00 - 01.01.2020 01:00,33.42,EUR
1,01.01.2020 01:00 - 01.01.2020 02:00,31.77,EUR
2,01.01.2020 02:00 - 01.01.2020 03:00,31.57,EUR
3,01.01.2020 03:00 - 01.01.2020 04:00,31.28,EUR
4,01.01.2020 04:00 - 01.01.2020 05:00,30.85,EUR


In [8]:
df.drop('Currency', inplace = True, axis =1)

In [9]:
df[['start_time', 'end_time']] = df['date&time'].str.split("-", expand = True)



In [10]:
df.head()

Unnamed: 0,date&time,Day-ahead Price [EUR/MWh],start_time,end_time
0,01.01.2020 00:00 - 01.01.2020 01:00,33.42,01.01.2020 00:00,01.01.2020 01:00
1,01.01.2020 01:00 - 01.01.2020 02:00,31.77,01.01.2020 01:00,01.01.2020 02:00
2,01.01.2020 02:00 - 01.01.2020 03:00,31.57,01.01.2020 02:00,01.01.2020 03:00
3,01.01.2020 03:00 - 01.01.2020 04:00,31.28,01.01.2020 03:00,01.01.2020 04:00
4,01.01.2020 04:00 - 01.01.2020 05:00,30.85,01.01.2020 04:00,01.01.2020 05:00


In [11]:
df.drop(['date&time', 'end_time'], inplace = True, axis = 1)

In [12]:
df['start_time'] = pd.to_datetime(df['start_time'], format='%d.%m.%Y %H:%M ')

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26307 entries, 0 to 26306
Data columns (total 2 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   Day-ahead Price [EUR/MWh]  26304 non-null  float64       
 1   start_time                 26307 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(1)
memory usage: 411.2 KB


In [14]:
dup_idx = df.index.duplicated()

In [15]:
df.loc[~dup_idx]

Unnamed: 0,Day-ahead Price [EUR/MWh],start_time
0,33.42,2020-01-01 00:00:00
1,31.77,2020-01-01 01:00:00
2,31.57,2020-01-01 02:00:00
3,31.28,2020-01-01 03:00:00
4,30.85,2020-01-01 04:00:00
...,...,...
26302,67.01,2022-12-31 19:00:00
26303,40.50,2022-12-31 20:00:00
26304,14.89,2022-12-31 21:00:00
26305,9.94,2022-12-31 22:00:00


In [24]:
# re-name the feature of date and time
df = df.rename(columns={'Day-ahead Price [EUR/MWh]': 'Price'})
#df

In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26307 entries, 0 to 26306
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Price       26304 non-null  float64       
 1   start_time  26307 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(1)
memory usage: 411.2 KB


In [29]:
df[df.index.duplicated()]

Unnamed: 0,Price,start_time


In [31]:
df['start_time'].duplicated().sum()

3

In [32]:
df[df['start_time'].duplicated()]

Unnamed: 0,Price,start_time
7155,0.09,2020-10-25 02:00:00
16060,64.49,2021-10-31 02:00:00
24797,99.92,2022-10-30 02:00:00


In [33]:
df[df['start_time'].duplicated(keep=False)]


Unnamed: 0,Price,start_time
7154,0.15,2020-10-25 02:00:00
7155,0.09,2020-10-25 02:00:00
16059,69.03,2021-10-31 02:00:00
16060,64.49,2021-10-31 02:00:00
24796,100.2,2022-10-30 02:00:00
24797,99.92,2022-10-30 02:00:00


In [37]:
df_no_duplicates = df.drop_duplicates(subset='start_time')

In [38]:
df_no_duplicates[df_no_duplicates['start_time'].duplicated(keep=False)]

Unnamed: 0,Price,start_time


In [39]:
time_series = TimeSeries.from_dataframe(df_no_duplicates, time_col='start_time', value_cols = 'Price', fill_missing_dates=True, freq=None)

In [41]:
time_series

