In [1]:
# import packages
import pandas as pd
import numpy as np

# Read data from text file
df = pd.read_csv("all_data_02.csv",header=0, sep="\t")
df.head()

Unnamed: 0,time,unit,value,date
0,15,min,26.6,06.08.2019
1,15,min,21.4,15.07.1985
2,15,min,20.4,30.06.2001
3,15,min,19.6,25.07.2006
4,15,min,19.2,15.08.1955


In [2]:
# Default value of display.max_rows is 10 so at max
# 10 rows will be printed. Set it None to display
# all rows in the dataframe
pd.set_option('display.max_rows', 20)

In [3]:
df["date"] = pd.to_datetime(df["date"], dayfirst=True)
df.dtypes

time              int64
unit             object
value           float64
date     datetime64[ns]
dtype: object

In [4]:
df['date'].sort_values(ascending=True)

158   1923-07-16
25    1928-07-04
357   1928-07-22
184   1928-07-22
458   1928-07-22
         ...    
8     2022-07-25
134   2022-07-25
386   2022-07-25
291   2022-07-25
572   2022-07-26
Name: date, Length: 575, dtype: datetime64[ns]

In [5]:
# Pivot the datatable 
df = df.pivot(columns="time", index="date", values="value")
df

time,1,3,6,12,15,24,30,45
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1923-07-16,,,,,,,,14.8
1928-07-04,,,,,13.0,,,
1928-07-22,32.0,36.6,38.4,38.6,,58.0,,
1936-04-16,,,,28.8,,33.0,,
1936-05-29,15.4,25.0,25.0,,,,,
...,...,...,...,...,...,...,...,...
2021-07-26,,,,,10.9,,,
2021-08-05,18.0,31.3,42.9,48.6,,,,
2021-10-05,,,,,,53.1,,
2022-07-25,25.6,28.8,30.3,30.3,16.4,,22.9,24.9


In [7]:
# Due to the fact that the piovt functions creates a multinindex, I've to select the columns using the .loc method
df.loc[:,15]

date
1923-07-16     NaN
1928-07-04    13.0
1928-07-22     NaN
1936-04-16     NaN
1936-05-29     NaN
              ... 
2021-07-26    10.9
2021-08-05     NaN
2021-10-05     NaN
2022-07-25    16.4
2022-07-26     NaN
Name: 15, Length: 241, dtype: float64

In [8]:
df.rename(columns={1: '1h', 3 : '3h', 6: '6h', 12: '12h', 24: '24h', 15: '15min', 30: '30min', 45: '45min'}, inplace=True)

In [9]:
df.sort_index(ascending=True)

time,1h,3h,6h,12h,15min,24h,30min,45min
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1923-07-16,,,,,,,,14.8
1928-07-04,,,,,13.0,,,
1928-07-22,32.0,36.6,38.4,38.6,,58.0,,
1936-04-16,,,,28.8,,33.0,,
1936-05-29,15.4,25.0,25.0,,,,,
...,...,...,...,...,...,...,...,...
2021-07-26,,,,,10.9,,,
2021-08-05,18.0,31.3,42.9,48.6,,,,
2021-10-05,,,,,,53.1,,
2022-07-25,25.6,28.8,30.3,30.3,16.4,,22.9,24.9


In [10]:
df = df.reset_index()

In [11]:
df 

time,date,1h,3h,6h,12h,15min,24h,30min,45min
0,1923-07-16,,,,,,,,14.8
1,1928-07-04,,,,,13.0,,,
2,1928-07-22,32.0,36.6,38.4,38.6,,58.0,,
3,1936-04-16,,,,28.8,,33.0,,
4,1936-05-29,15.4,25.0,25.0,,,,,
...,...,...,...,...,...,...,...,...,...
236,2021-07-26,,,,,10.9,,,
237,2021-08-05,18.0,31.3,42.9,48.6,,,,
238,2021-10-05,,,,,,53.1,,
239,2022-07-25,25.6,28.8,30.3,30.3,16.4,,22.9,24.9


In [19]:
column_names = list(df.columns)
column_names = column_names[1:]
loc_gumbel = {}
scale_gumbel = {}

for keys in column_names:
    loc_gumbel[keys] = -999
    scale_gumbel[keys] = -999

loc_gumbel
scale_gumbel

{'1h': -999,
 '3h': -999,
 '6h': -999,
 '12h': -999,
 '15min': -999,
 '24h': -999,
 '30min': -999,
 '45min': -999}

In [12]:
df['1h'][pd.isna(df['1h'])==False]

2      32.0
4      15.4
5      30.2
8      21.0
10      9.8
       ... 
227    25.1
230    26.9
232    28.0
237    18.0
239    25.6
Name: 1h, Length: 79, dtype: float64

In [13]:
df['1h'][df['1h'].notna()]

2      32.0
4      15.4
5      30.2
8      21.0
10      9.8
       ... 
227    25.1
230    26.9
232    28.0
237    18.0
239    25.6
Name: 1h, Length: 79, dtype: float64

In [17]:
for column_name in df.columns:
    column_values = df[column_name]
    print(column_values)

0     1923-07-16
1     1928-07-04
2     1928-07-22
3     1936-04-16
4     1936-05-29
         ...    
236   2021-07-26
237   2021-08-05
238   2021-10-05
239   2022-07-25
240   2022-07-26
Name: date, Length: 241, dtype: datetime64[ns]
0       NaN
1       NaN
2      32.0
3       NaN
4      15.4
       ... 
236     NaN
237    18.0
238     NaN
239    25.6
240     NaN
Name: 1h, Length: 241, dtype: float64
0       NaN
1       NaN
2      36.6
3       NaN
4      25.0
       ... 
236     NaN
237    31.3
238     NaN
239    28.8
240     NaN
Name: 3h, Length: 241, dtype: float64
0       NaN
1       NaN
2      38.4
3       NaN
4      25.0
       ... 
236     NaN
237    42.9
238     NaN
239    30.3
240     NaN
Name: 6h, Length: 241, dtype: float64
0       NaN
1       NaN
2      38.6
3      28.8
4       NaN
       ... 
236     NaN
237    48.6
238     NaN
239    30.3
240     NaN
Name: 12h, Length: 241, dtype: float64
0       NaN
1      13.0
2       NaN
3       NaN
4       NaN
       ... 
236    10.9
2