In [27]:
import pandas as pd
import datetime
import os
from urllib.request import urlopen 



In [30]:
def download_data(i):
    with urlopen(f'https://www.star.nesdis.noaa.gov/smcd/emb/vci/VH/get_TS_admin.php?country=UKR&provinceID={i}&year1=1981&year2=2024&type=Mean') as date:
        info = date.read()

    now = datetime.datetime.now()
    timen = now.strftime("%d%m%Y%H%M%S")
    
    
    with open(f'obl_{i}_{timen}.csv', 'wb') as fl:
        fl.write(info)

for i in range(1,28):
    download_data(i)

In [None]:
def normalizer_data(file):
    df = pd.read_csv(file,header = 1,names = ['РІК', 'ТИЖНІ', 'SMN', 'SMT', 'VCI', 'TCI', 'VHI', 'empty'])
    df = df.drop(index=df.shape[0]-1)
    df['РІК'] = df['РІК'].str.replace('<tt><pre>', '')
    return df



def create_dataframe():
    listdataframes = []
    for i in os.listdir('.'):
        if not '.csv' in i:
            continue
    
        df = normalizer_data(f'{i}')
        df = df.drop(df.loc[df['VHI'] == -1].index)
        df['area'] = (i.split('_'))[1]
        listdataframes.append(df)
    result = pd.concat(listdataframes).drop_duplicates().reset_index(drop=True)
    result.drop(columns=['empty'], inplace=True)
    return result

result = create_dataframe()

result

Unnamed: 0,РІК,ТИЖНІ,SMN,SMT,VCI,TCI,VHI,area
0,1982,1.0,0.059,258.24,51.11,48.78,49.95,10
1,1982,2.0,0.063,261.53,55.89,38.20,47.04,10
2,1982,3.0,0.063,263.45,57.30,32.69,44.99,10
3,1982,4.0,0.061,265.10,53.96,28.62,41.29,10
4,1982,5.0,0.058,266.42,46.87,28.57,37.72,10
...,...,...,...,...,...,...,...,...
58180,2024,17.0,0.305,297.97,70.10,16.63,43.40,9
58181,2024,18.0,0.321,298.97,68.55,24.34,46.47,9
58182,2024,19.0,0.333,299.93,65.69,30.41,48.08,9
58183,2024,20.0,0.339,300.65,62.39,34.66,48.56,9


In [None]:
def change_index(df):
    chang_index = [22,24,23,25,3,4,8,19,20,21,9,9,10,11,12,13,14,15,16,25,17,18,6,1,2,7,5]
    
    arr = {str(i):chang_index[i-1] for i in range(1,28)}
    print(arr)
    df['area'] = df['area'].replace(arr)
    print(df)


change_index(result)

{'1': 22, '2': 24, '3': 23, '4': 25, '5': 3, '6': 4, '7': 8, '8': 19, '9': 20, '10': 21, '11': 9, '12': 9, '13': 10, '14': 11, '15': 12, '16': 13, '17': 14, '18': 15, '19': 16, '20': 25, '21': 17, '22': 18, '23': 6, '24': 1, '25': 2, '26': 7, '27': 5}
        РІК  ТИЖНІ    SMN     SMT    VCI    TCI    VHI  area
0      1982    1.0  0.059  258.24  51.11  48.78  49.95    21
1      1982    2.0  0.063  261.53  55.89  38.20  47.04    21
2      1982    3.0  0.063  263.45  57.30  32.69  44.99    21
3      1982    4.0  0.061  265.10  53.96  28.62  41.29    21
4      1982    5.0  0.058  266.42  46.87  28.57  37.72    21
...     ...    ...    ...     ...    ...    ...    ...   ...
58180  2024   17.0  0.305  297.97  70.10  16.63  43.40    20
58181  2024   18.0  0.321  298.97  68.55  24.34  46.47    20
58182  2024   19.0  0.333  299.93  65.69  30.41  48.08    20
58183  2024   20.0  0.339  300.65  62.39  34.66  48.56    20
58184  2024   21.0  0.342  301.42  60.67  34.88  47.79    20

[58185 rows x 8

  df['area'] = df['area'].replace(arr)


In [None]:
result.to_csv('VHI_index.csv')

In [None]:
def max_series_vhi(df, area, year):
    vhi = df[(df["area"]==area) & (df["РІК"]==year)]['VHI']
    return vhi.max()

def min_series_vhi(df, area, year):
    vhi = df[(df["area"]==area) & (df["РІК"]==year)]['VHI']
    return vhi.min()


print('Max vhi for 1982',max_series_vhi(result, 21, '1982'))

print('Min vhi for 1982',min_series_vhi(result, 21, '1982'))

Max vhi for 1982 51.83
Min vhi for 1982 22.82


In [None]:
def extr_vhi_year(df, year, obl, proc):
    vhi_index = df[(df['РІК'].isin(year)) & (df['area'].isin(obl))]
    years = []
    for i in year:
        df_with_extr_vhi = vhi_index[(vhi_index['VHI']<15) & (vhi_index['РІК']==i)]
        total_area_with_extr_vhi = len(df_with_extr_vhi.area.unique())
        total_area = len(vhi_index.area.unique())
        proc_for_year = (total_area_with_extr_vhi*100)/total_area
        if proc_for_year>proc:
            years.append(i)

    return years
years = [str(i) for i in range(1982,2020)]
extr_vhi_year(result, years, list(range(1,26)), 4)

['1986', '1993', '2000', '2007']

In [None]:
def mild_vhi_year(df, year, obl, proc):
    vhi_index = df[(df['РІК'].isin(year)) & (df['area'].isin(obl))].reset_index(drop=True)
    years = []
    for i in year:
        df_with_extr_vhi = vhi_index[(vhi_index['VHI']<35) & (vhi_index['VHI']>15) &(vhi_index['РІК']==i)]
        total_area_with_extr_vhi = len(df_with_extr_vhi.area.unique())
        total_area = len(vhi_index.area.unique())
        proc_for_year = (total_area_with_extr_vhi*100)/total_area
        if proc_for_year>proc:
            years.append(i)
    return years

In [None]:
mild_vhi_year(result, years, list(range(1,26)), 90)

['1982',
 '1983',
 '1984',
 '1985',
 '1986',
 '1989',
 '1990',
 '1992',
 '1993',
 '1995',
 '1996',
 '2000',
 '2015',
 '2019']