In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
def parse_df(name, period=0.5):
    url = f"http://www.ioc-sealevelmonitoring.org/bgraph.php?code={name}&output=tab&period={period}"
    df = pd.read_html(url)[0]
    df = df.iloc[1:, :]
    df = pd.DataFrame({'Date': df[0].str.split(' ', 1).str[0], 
                       'Time': df[0].str.split(' ', 1).str[1], 
                       'Height': df[1]})
    
    return df

In [3]:
posi_df = parse_df('posi', 0.5)
posi_df.head()

Unnamed: 0,Date,Time,Height
1,2021-09-09,20:06:00,4.45
2,2021-09-09,20:07:00,4.45
3,2021-09-09,20:08:00,4.45
4,2021-09-09,20:09:00,4.45
5,2021-09-09,20:10:00,4.45


In [4]:
vlad_df = parse_df('vlad', 0.5)
posi_vlad_df = vlad_df.merge(posi_df, on=['Date', 'Time'], how='outer')
posi_vlad_df.head()

Unnamed: 0,Date,Time,Height_x,Height_y
0,2021-09-09,20:06:00,4.7,4.45
1,2021-09-09,20:07:00,4.72,4.45
2,2021-09-09,20:08:00,4.72,4.45
3,2021-09-09,20:09:00,4.72,4.45
4,2021-09-09,20:10:00,4.74,4.45


In [5]:
posi_vlad_df = pd.concat([posi_df, vlad_df], axis=0)
posi_vlad_df.head()

Unnamed: 0,Date,Time,Height
1,2021-09-09,20:06:00,4.45
2,2021-09-09,20:07:00,4.45
3,2021-09-09,20:08:00,4.45
4,2021-09-09,20:09:00,4.45
5,2021-09-09,20:10:00,4.45


In [6]:
def parse_dataset(stations_names, time_period=0.5):
    res_df = parse_df(stations_names[0], time_period)
    res_df['Station'] = stations_names[0]
    for name in stations_names[1:]:
        df = parse_df(name, time_period)
        df['Station'] = name
        
        res_df = pd.concat([res_df, df], axis=0)
        
    return res_df

stations_codes = ['posi', 'vlad', 'naho', 
        'preo', 'rudn', 'sosu', 
        'sove', 'kril', 'waka', 
        'hako', 'fuka', 'sado', 
        'noto', 'toya', 'saig', 'hmda']

jap_sea_df = parse_dataset(stations_codes)
jap_sea_df.head(1000)
        

Unnamed: 0,Date,Time,Height,Station
1,2021-09-09,20:06:00,4.45,posi
2,2021-09-09,20:07:00,4.45,posi
3,2021-09-09,20:08:00,4.45,posi
4,2021-09-09,20:09:00,4.45,posi
5,2021-09-09,20:10:00,4.45,posi
...,...,...,...,...
297,2021-09-10,01:02:00,4.6,vlad
298,2021-09-10,01:03:00,4.6,vlad
299,2021-09-10,01:04:00,4.6,vlad
300,2021-09-10,01:05:00,4.6,vlad


In [7]:
def parse_dataset_alt(stations_names, time_period=0.5):
    res_df = parse_df(stations_names[0], time_period)
    res_df.rename(columns={'Height':(stations_names[0] + '_Height')}, inplace=True)
    res_df = res_df.set_index(['Date', 'Time'])
    for name in stations_names[1:]:
        df = parse_df(name, time_period)
        df = df.set_index(['Date', 'Time'])
        
        res_df[(name + '_Height')] = df['Height']
        
    return res_df

stations_codes = ['posi', 'vlad', 'naho', 
        'preo', 'rudn', 'sosu', 
        'sove', 'kril', 'waka', 
        'hako', 'fuka', 'sado', 
        'noto', 'toya', 'saig', 'hmda']

jap_sea_df = parse_dataset_alt(stations_codes)
jap_sea_df.head(1000)

Unnamed: 0_level_0,Unnamed: 1_level_0,posi_Height,vlad_Height,naho_Height,preo_Height,rudn_Height,sosu_Height,sove_Height,kril_Height,waka_Height,hako_Height,fuka_Height,sado_Height,noto_Height,toya_Height,saig_Height,hmda_Height
Date,Time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2021-09-09,20:06:00,4.45,4.7,4.64,2.12,3.68,2.45,1.41,3.32,2.2555,2.1885,2.3896,2.0879,2.4445,2.5329,2.222,2.3531
2021-09-09,20:07:00,4.45,4.72,4.64,2.16,3.68,2.45,1.41,3.32,2.2555,2.1976,2.3957,2.0848,2.4475,2.5329,2.222,2.3439
2021-09-09,20:08:00,4.45,4.72,4.64,2.16,3.66,2.44,1.41,3.32,2.2586,2.1946,2.3988,2.0787,2.4414,2.5329,2.2128,2.3348
2021-09-09,20:09:00,4.45,4.72,4.64,2.16,3.64,2.44,1.41,3.32,2.2494,2.2007,2.4049,2.0726,2.4414,2.5329,2.2128,2.3287
2021-09-09,20:10:00,4.45,4.74,4.64,2.16,3.62,2.44,1.42,3.32,2.2464,2.2037,2.4079,2.0726,2.4293,2.5359,2.2098,2.3256
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-09-10,07:40:00,4.43,4.85,4.68,2.19,3.66,2.45,1.5,3.08,2.2738,2.1519,2.4384,2.1184,2.4933,2.539,2.3134,2.4689
2021-09-10,07:41:00,4.43,4.87,4.68,2.22,3.66,2.45,1.5,3.08,2.2708,2.1488,2.4414,2.1184,2.5024,2.542,2.3226,2.4658
2021-09-10,07:42:00,4.45,4.87,4.68,2.21,3.67,2.45,1.5,3.09,2.2647,2.1549,2.4384,2.1184,2.5024,2.5359,2.3226,2.4597
2021-09-10,07:43:00,4.45,4.87,4.68,2.21,3.67,2.44,1.5,3.09,2.2616,2.1549,2.4384,2.1153,2.4994,2.5359,2.3226,2.4628


In [8]:
jap_sea_df.isnull().sum()

posi_Height    0
vlad_Height    0
naho_Height    0
preo_Height    0
rudn_Height    0
sosu_Height    1
sove_Height    0
kril_Height    4
waka_Height    0
hako_Height    0
fuka_Height    0
sado_Height    0
noto_Height    0
toya_Height    0
saig_Height    0
hmda_Height    0
dtype: int64