In [3]:
import os
import pandas as pd
import urllib.request
from datetime import datetime

In [4]:
def get_data(i):
    url = f"https://www.star.nesdis.noaa.gov/smcd/emb/vci/VH/get_TS_admin.php?country=UKR&provinceID={i}&year1=1981&year2=2024&type=Mean"
    vhi_url = urllib.request.urlopen(url)

    directory = 'csv_files'
    os.makedirs(directory, exist_ok=True)

    now = datetime.now()
    date_time = now.strftime("%Y-%m-%d_%H-%M-%S")
    filename = f'vhi_id_{i}_{date_time}.csv'

    filepath = os.path.join(directory, filename)
    with open(filepath, 'wb') as out:
        out.write(vhi_url.read())

    print(f"VHI for province {i} is downloaded and saved as {filename} in {directory} directory.")


for i in range(1, 28):
    get_data(i)

VHI for province 1 is downloaded and saved as vhi_id_1_2024-02-29_18-02-11.csv in csv_files directory.
VHI for province 2 is downloaded and saved as vhi_id_2_2024-02-29_18-02-15.csv in csv_files directory.
VHI for province 3 is downloaded and saved as vhi_id_3_2024-02-29_18-02-16.csv in csv_files directory.
VHI for province 4 is downloaded and saved as vhi_id_4_2024-02-29_18-02-18.csv in csv_files directory.
VHI for province 5 is downloaded and saved as vhi_id_5_2024-02-29_18-02-19.csv in csv_files directory.
VHI for province 6 is downloaded and saved as vhi_id_6_2024-02-29_18-02-20.csv in csv_files directory.
VHI for province 7 is downloaded and saved as vhi_id_7_2024-02-29_18-02-21.csv in csv_files directory.
VHI for province 8 is downloaded and saved as vhi_id_8_2024-02-29_18-02-22.csv in csv_files directory.
VHI for province 9 is downloaded and saved as vhi_id_9_2024-02-29_18-02-22.csv in csv_files directory.
VHI for province 10 is downloaded and saved as vhi_id_10_2024-02-29_18-02

In [75]:
def read(directory):
    headers = ['Year', 'Week', 'SMN', 'SMT', 'VCI', 'TCI', 'VHI', 'empty']
    files = os.listdir(directory)
    result_df = pd.DataFrame()
    for i in range(len(files)):
        full_path = os.path.join(directory, files[i])
        df = pd.read_csv(full_path, header=1, names=headers)
        df = df.drop(df.loc[df['VHI'] == -1].index)
        df['area'] = i + 1
        df['Year'] = df['Year'].str.replace('<tt><pre>', '')
        df = df[~df['Year'].str.contains('</pre></tt>')]
        result_df = pd.concat([result_df, df])
    return result_df

df = read(directory)
print(df)


      Year  Week    SMN     SMT    VCI    TCI    VHI  empty  area
0     1982   1.0  0.059  258.24  51.11  48.78  49.95    NaN     1
1     1982   2.0  0.063  261.53  55.89  38.20  47.04    NaN     1
2     1982   3.0  0.063  263.45  57.30  32.69  44.99    NaN     1
3     1982   4.0  0.061  265.10  53.96  28.62  41.29    NaN     1
4     1982   5.0  0.058  266.42  46.87  28.57  37.72    NaN     1
...    ...   ...    ...     ...    ...    ...    ...    ...   ...
2187  2024   4.0  0.094  273.52  55.09  11.98  33.53    NaN    27
2188  2024   5.0  0.097  274.20  56.24  11.92  34.08    NaN    27
2189  2024   6.0  0.104  275.01  57.22  14.09  35.66    NaN    27
2190  2024   7.0  0.110  275.79  55.76  18.29  37.02    NaN    27
2191  2024   8.0  0.118  276.64  53.80  23.10  38.45    NaN    27

[57834 rows x 9 columns]


In [87]:
def changeID(df):
    cities =  {1: "Cherkasy", 2: "Chernihiv", 3: "Chernivtsi", 4: "Crimea", 5: "Dnipropetrovs'k", 6: "Donets'k", 7: "Ivano-Frankivs'k", 8: "Kharkiv", 9: "Kherson", 10: "Khmel'nyts'kyy", 11: "Kiev", 12: "Kiev City", 13: "Kirovohrad", 14: "Luhans'k", 15: "L'viv", 16: "Mykolayiv", 17: "Odessa", 18: "Poltava", 19: "Rivne", 20: "Sevastopol'", 21: "Sumy", 22: "Ternopil'", 23: "Transcarpathia", 24: "Vinnytsya", 25: "Volyn", 26: "Zaporizhzhya", 27: "Zhytomyr"}
    df['area'] = df['area'].replace(cities)
    
    return df

In [88]:
df = changeID(read(directory))
print(df)

      Year  Week    SMN     SMT    VCI    TCI    VHI  empty      area
0     1982   1.0  0.059  258.24  51.11  48.78  49.95    NaN  Cherkasy
1     1982   2.0  0.063  261.53  55.89  38.20  47.04    NaN  Cherkasy
2     1982   3.0  0.063  263.45  57.30  32.69  44.99    NaN  Cherkasy
3     1982   4.0  0.061  265.10  53.96  28.62  41.29    NaN  Cherkasy
4     1982   5.0  0.058  266.42  46.87  28.57  37.72    NaN  Cherkasy
...    ...   ...    ...     ...    ...    ...    ...    ...       ...
2187  2024   4.0  0.094  273.52  55.09  11.98  33.53    NaN  Zhytomyr
2188  2024   5.0  0.097  274.20  56.24  11.92  34.08    NaN  Zhytomyr
2189  2024   6.0  0.104  275.01  57.22  14.09  35.66    NaN  Zhytomyr
2190  2024   7.0  0.110  275.79  55.76  18.29  37.02    NaN  Zhytomyr
2191  2024   8.0  0.118  276.64  53.80  23.10  38.45    NaN  Zhytomyr

[57834 rows x 9 columns]


In [161]:
def max_extr(df, area, year):
    max_vhi = df[(df['area'] == area) & (df['Year'] == year)]['VHI'].max()
    
    return max_vhi

max_vhi = max_extr(df, "Kiev", '2022')
print('max:', max_vhi)

def min_extr(df, area, year):
    min_vhi = df[(df['area'] == area) & (df['Year'] == year)]['VHI'].min()
    
    return min_vhi

min_vhi = min_extr(df, "Kiev", '2022')
print('min:', min_vhi)

max: 72.32
min: 36.12


In [158]:
def dry(df, area, percent):
    df_area = df[df["area"] == area]
    df_drought = df[(df.VHI <= percent)]["Year"].unique()
    df_drought.sort()
    return df_drought


print(dry(df, "Kiev", 15))

['1984' '1986' '1993' '1994' '1999' '2000' '2003' '2007' '2012']


In [157]:
def dry(df, area, percent):
    df_area = df[df["area"] == area]
    df_drought = df[(df.VHI <= percent)]["Year"].unique()
    df_drought.sort()
    return df_drought


print(dry(df, "Kiev", 35))

['1982' '1983' '1984' '1985' '1986' '1987' '1988' '1989' '1990' '1991'
 '1992' '1993' '1994' '1995' '1996' '1997' '1998' '1999' '2000' '2001'
 '2002' '2003' '2004' '2005' '2006' '2007' '2008' '2009' '2010' '2011'
 '2012' '2013' '2014' '2015' '2016' '2017' '2018' '2019' '2020' '2021'
 '2022' '2023' '2024']
