In [19]:
import pandas as pd
import numpy as np

df: pd.DataFrame = pd.read_csv('omsz_data/Győr-Moson-Sopron_Mosonmagyaróvár.csv',
                               skiprows=4, # skip metadata of csv
                               sep=';', # separator
                               skipinitialspace=True, # remove trailing whitespace
                               na_values=['EOR', -999], # End Of Record is irrelevant, -999 means missing value
                               low_memory=False, # warning about mixed types
                               )
df.columns = df.columns.str.strip() # remove trailing whitespaces
df['Time'] = pd.to_datetime(df['Time'], format='%Y%m%d%H%M') # convert to datetime
df.index = df['Time'] # set index to datetime
df.drop('Time', axis=1, inplace=True) # remove unnecessary column
df.dropna(how='all', axis=1, inplace=True) # remove columns with all NaN values
# print(df.dtypes)
# print(df.describe())

df

StationNumber      int64
r                float64
t                float64
ta               float64
tn               float64
tx               float64
v                 object
p                float64
u                  int64
sg               float64
sr               float64
fs               float64
fsd              float64
fx               float64
fxd                int64
fxdat             object
we               float64
p0               float64
f                float64
fd                 int64
et5              float64
et10             float64
et20             float64
et50             float64
et100            float64
tsn              float64
dtype: object
       StationNumber              r              t             ta  \
count       184080.0  184080.000000  184080.000000  184080.000000   
mean         23201.0       0.063639      11.200814      11.205796   
std              0.0       0.488835       9.132266       9.127287   
min          23201.0       0.000000     -17.800000     -17.8

Unnamed: 0_level_0,StationNumber,r,t,ta,tn,tx,v,p,u,sg,...,we,p0,f,fd,et5,et10,et20,et50,et100,tsn
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2002-01-01 00:00:00,23201,0.0,-4.3,-4.4,-4.8,-4.3,,1013.3,73,99.33,...,,1029.0,8.8,330,,,,,,-5.1
2002-01-01 01:00:00,23201,0.0,-4.7,-4.6,-4.8,-4.4,,1014.5,64,99.56,...,,1030.3,9.0,345,,,,,,-5.2
2002-01-01 02:00:00,23201,0.0,-5.2,-5.2,-5.4,-4.8,,1016.0,64,98.61,...,,1031.8,5.6,332,,,,,,-5.9
2002-01-01 03:00:00,23201,0.0,-6.0,-5.8,-6.0,-5.5,,1016.3,64,97.76,...,,1032.2,7.4,330,,,,,,-6.5
2002-01-01 04:00:00,23201,0.0,-6.0,-5.9,-6.1,-5.7,,1016.9,63,100.95,...,,1032.8,6.3,344,,,,,,-6.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-31 19:00:00,23201,0.0,7.4,8.5,7.2,10.1,,1009.8,93,92.97,...,12.0,1024.8,1.1,179,4.5,4.3,4.4,5.5,7.6,5.1
2022-12-31 20:00:00,23201,0.0,9.6,9.6,7.4,10.4,,1009.6,82,95.37,...,5.0,1024.5,2.0,192,4.5,4.3,4.4,5.5,7.6,4.8
2022-12-31 21:00:00,23201,0.0,9.7,9.2,8.4,10.0,,1009.6,83,93.66,...,5.0,1024.5,1.6,188,4.5,4.4,4.4,5.5,7.5,4.4
2022-12-31 22:00:00,23201,0.0,8.2,9.4,8.2,10.1,,1009.7,87,92.92,...,5.0,1024.7,1.6,148,4.5,4.4,4.5,5.5,7.5,5.3


In [3]:
import warnings

# suppress default style warning
with warnings.catch_warnings(record=True):
    warnings.simplefilter("always")
    df: pd.DataFrame = pd.read_excel('veradatok.xlsx', engine='openpyxl')

df.columns = df.columns.str.strip()
df['Time'] = pd.to_datetime(df['Időpont'], utc=True).dt.tz_localize(None)
df.drop('Időpont', axis=1, inplace=True)
df.index = df['Time']

df

Unnamed: 0_level_0,Nettó terv rendszerterhelés,Nettó terhelés,MAVIR becslés,Nettó terv rendszertermelés,Nettó rendszerterhelés tény - üzemirányítási,Bruttó hitelesített rendszerterhelés tény,Bruttó terv rendszerterhelés,Nettó tény rendszerterhelés - net.ker.elsz.meres,Bruttó tény rendszerterhelés,Nettó MAVIR rendszerterhelés becslés,Time
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2018-01-01 00:00:00,3926.540,3986.028,4276.50,3911.154,3986.027,4199.25,4258.438,3851.035,4193.318,3858.101,2018-01-01 00:00:00
2018-01-01 01:00:00,3790.682,3866.534,4157.00,3775.331,3866.534,4069.75,4120.233,3731.949,4075.735,3757.103,2018-01-01 01:00:00
2018-01-01 02:00:00,3513.431,3607.349,3898.25,3501.505,3607.349,3814.75,3863.475,3475.011,3816.608,3559.871,2018-01-01 02:00:00
2018-01-01 03:00:00,3335.715,3418.238,3697.00,3413.855,3418.237,3624.75,3793.290,3289.397,3625.328,3323.871,2018-01-01 03:00:00
2018-01-01 04:00:00,3257.596,3360.744,3627.75,3469.264,3360.744,3562.25,3870.480,3229.467,3567.273,3238.276,2018-01-01 04:00:00
...,...,...,...,...,...,...,...,...,...,...,...
2023-09-14 19:00:00,5343.331,,5615.50,5361.806,,,5614.418,,,5309.000,2023-09-14 19:00:00
2023-09-14 20:00:00,4999.134,,5249.25,5169.298,,,5398.140,,,4940.750,2023-09-14 20:00:00
2023-09-14 21:00:00,4732.110,,4992.75,4817.198,,,5036.590,,,4684.500,2023-09-14 21:00:00
2023-09-14 22:00:00,4530.384,,4774.00,4709.204,,,4944.151,,,4471.750,2023-09-14 22:00:00


In [6]:
df = pd.read_csv('station_meta_auto.csv', sep=';', skipinitialspace=True, index_col='StationNumber', na_values='EOR')
df.dropna(how='all', axis=1, inplace=True)

df

Unnamed: 0_level_0,StartDate,EndDate,Latitude,Longitude,Elevation,StationName,RegioName
StationNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
13704,19950422,20040317,47.6783,16.6022,232.8,Sopron Kuruc-domb,Győr-Moson-Sopron
13704,20050727,20230914,47.6783,16.6022,232.8,Sopron Kuruc-domb,Győr-Moson-Sopron
13711,20031107,20230914,47.7147,16.6658,116.8,Fertőrákos,Győr-Moson-Sopron
14707,20140519,20230914,47.4806,16.7292,198.7,Sopronhorpács,Győr-Moson-Sopron
15310,20020227,20230914,47.1983,16.6478,200.1,Szombathely,Vas
...,...,...,...,...,...,...,...
72805,19981231,20230914,48.1042,22.7694,114.7,Milota,Szabolcs-Szatmár-Bereg
73110,20210906,20230914,47.8636,22.2222,150.0,Nyírkáta Herczeghtanya,Szabolcs-Szatmár-Bereg
73313,20140605,20230914,47.8456,22.6631,117.6,Csenger,Szabolcs-Szatmár-Bereg
73505,19980311,20230914,47.6961,22.0569,155.7,Nyírlugos,Szabolcs-Szatmár-Bereg
