In [71]:
import urllib.request
import pandas as pd
from datetime import datetime
import os
import glob
from bs4 import BeautifulSoup

In [72]:
def finde_files(pattern):
    files = glob.glob(pattern)
    return files

def delete_files(pattern):
    files = finde_files(pattern)
    for file in files:
        os.remove(file)

In [105]:
def install_files(path):
    for i in range(1, 28):
        file_pattern = f"{path}/vhi_id_{i}_*"
        delete_files(file_pattern)
        url=f"https://www.star.nesdis.noaa.gov/smcd/emb/vci/VH/get_TS_admin.php?country=UKR&provinceID={i}&year1=1981&year2=2024&type=Mean"
        vhi_url = urllib.request.urlopen(url)
        time = datetime.now()
        now = time.strftime("%d_%m_%Y_%H;%M;%S")
        out = open(f'{path}/vhi_id_{i}_downloaded_time{now}.csv','wb')
        out.write(vhi_url.read()) 
        out.close()
        print(f"VHI {i} is downloaded...")

        file_pattern = f"{path}/vhi_id_{i}_*"
        file_path = finde_files(file_pattern)[0]

        with open(file_path, 'r', encoding='utf-8') as file:
            file_content = file.read()
        soup = BeautifulSoup(file_content, 'html.parser')
        cleaned_text = soup.get_text()
        with open(file_path, 'w', encoding='utf-8') as temp_file:
            temp_file.write(cleaned_text)

        with open(file_path, 'r', encoding='utf-8') as file:
            lines = [line.rstrip(',\n') for line in file]
        with open(file_path, 'w', encoding='utf-8') as temp_file:
            temp_file.write('\n'.join(lines))

        data = pd.read_csv(file_path, lineterminator ='\n', skiprows=1)
        data.columns = [col.replace('\r', '') for col in data.columns]
        data.columns = [col.replace(' ', '') for col in data.columns]

        new_indexes = {
            1: 24, 2: 26, 3: 25, 4: 27, 5: 3, 6: 4, 7: 8, 8: 21, 9: 22,
            10: 23, 11: 10, 12: 9, 13: 11, 14: 12, 15: 13, 16: 14, 17: 15, 18: 16,
            19: 17, 20: 18, 21: 19, 22: 20, 23: 6, 24: 1, 25: 2, 26: 7, 27: 5,
        }

        data['area'] = new_indexes[i]
        data = data.drop(data.loc[data['VHI'] == -1].index)

        area_index = {
            1: 'Vinnytsya', 2: 'Volyn', 3: "Dnipropetrovs'k", 4: "Donets'k", 5: 'Zhytomyr', 6: 'Transcarpathia', 7: 'Zaporizhzhya', 8: "Ivano-Frankivs'k", 9: 'Kiev City',
            10: 'Kiev', 11: 'Kirovohrad', 12: "Luhans'k", 13: "L'viv", 14: 'Mykolayiv', 15: 'Odessa', 16: 'Poltava', 17: 'Rivne', 18: "Sevastopol'",
            19: 'Sumy', 20: "Ternopil'", 21: 'Kharkiv', 22: 'Kherson', 23: "Khmel'nyts'kyy", 24: 'Cherkasy', 25: 'Chernivtsi', 26: 'Chernihiv', 27: 'Crimea',
        }
        data['area'] = data['area'].replace(area_index)

        file_pattern = f"{path}/mod_vhi_id_{new_indexes[i]}_*"
        delete_files(file_pattern)
        data.to_csv(f'{path}/mod_vhi_id_{new_indexes[i]}_downloaded_time{now}.csv', index=False, encoding='utf-8')
        print(f'VHI {i} is modified')

    data_frames = []
    for i in range(1, 28):
        file_pattern = f"{path}/mod_vhi_id_{i}_*"
        file_path = finde_files(file_pattern)[0]
        data = pd.read_csv(file_path)
        data_frames.append(data)
        
    data = pd.concat(data_frames)
    delete_files(f'{path}/vhi_end.csv')
    data.to_csv(f'{path}/vhi_end.csv', index=False, encoding='utf-8')

install_files("./Data")


VHI 1 is downloaded...
VHI 1 is modified
VHI 2 is downloaded...
VHI 2 is modified
VHI 3 is downloaded...
VHI 3 is modified
VHI 4 is downloaded...
VHI 4 is modified
VHI 5 is downloaded...
VHI 5 is modified
VHI 6 is downloaded...
VHI 6 is modified
VHI 7 is downloaded...
VHI 7 is modified
VHI 8 is downloaded...
VHI 8 is modified
VHI 9 is downloaded...
VHI 9 is modified
VHI 10 is downloaded...
VHI 10 is modified
VHI 11 is downloaded...
VHI 11 is modified
VHI 12 is downloaded...
VHI 12 is modified
VHI 13 is downloaded...
VHI 13 is modified
VHI 14 is downloaded...
VHI 14 is modified
VHI 15 is downloaded...
VHI 15 is modified
VHI 16 is downloaded...
VHI 16 is modified
VHI 17 is downloaded...
VHI 17 is modified
VHI 18 is downloaded...
VHI 18 is modified
VHI 19 is downloaded...
VHI 19 is modified
VHI 20 is downloaded...
VHI 20 is modified
VHI 21 is downloaded...
VHI 21 is modified
VHI 22 is downloaded...
VHI 22 is modified
VHI 23 is downloaded...
VHI 23 is modified
VHI 24 is downloaded...
VHI 2

In [107]:
def vhi(region, year, path):
    data = pd.read_csv(f'{path}/vhi_end.csv')
    VHIs = data[(data["area"] == region) & (data["year"] == year)]['VHI']
    delete_files(f'{path}/vhi_{region}_{year}.csv')
    VHIs.to_csv(f'{path}/vhi_{region}_{year}.csv', index=False, encoding='utf-8')
    return VHIs

data = vhi("Ternopil'", 2017, './Data')
print(data)

43304    44.88
43305    46.97
43306    49.07
43307    48.21
43308    45.25
43309    41.92
43310    39.24
43311    39.97
43312    41.74
43313    42.32
43314    41.01
43315    42.47
43316    43.16
43317    42.09
43318    41.62
43319    43.11
43320    48.01
43321    52.06
43322    54.65
43323    56.82
43324    58.47
43325    61.03
43326    62.01
43327    61.80
43328    63.25
43329    63.54
43330    61.62
43331    61.87
43332    62.15
43333    61.61
43334    61.06
43335    60.50
43336    60.75
43337    62.09
43338    62.17
43339    61.14
43340    60.83
43341    59.52
43342    57.40
43343    57.05
43344    56.00
43345    53.39
43346    50.79
43347    47.95
43348    48.01
43349    45.75
43350    45.48
43351    48.07
43352    52.02
43353    51.70
43354    51.04
43355    52.03
Name: VHI, dtype: float64


In [106]:
def data_filter(regions, years, path):
    data = pd.read_csv(f'{path}/vhi_end.csv')
    filtered_data = data[(data['area'].isin(regions)) & (data['year'].isin(years))]
    delete_files(f'{path}/vhi_{regions}_{years}.csv')
    filtered_data.to_csv(f'{path}/vhi_{regions}_{years}.csv', index=False, encoding='utf-8')
    return filtered_data

def minmax(regions, years, path):
    data = data_filter(regions, years, path)
    min_v = data['VHI'].min()
    max_v = data['VHI'].max()
    avrg = data['VHI'].mean()
    mediana = data['VHI'].median()
    return min_v, max_v, round(avrg, 2), mediana

minimum, maximum, avrg, mediana = minmax(['Poltava', 'Kiev'], [2014, 2015], './Data')
print(f"Мінімальне VHI: {minimum}")
print(f"Максимальне VHI: {maximum}")
print(f"Середнє VHI: {avrg}")
print(f"Медіана VHI: {mediana}")

Мінімальне VHI: 30.43
Максимальне VHI: 70.41
Середнє VHI: 45.79
Медіана VHI: 43.555


In [108]:
def vhi_years(fromy, toy, regions, path):
    years = list(range(fromy, toy+1))
    data = data_filter(regions, years, path)
    return data['VHI']

vhi = vhi_years(2011, 2014, ['Poltava', 'Kiev'], './Data')
print(vhi)

21132    42.93
21133    39.83
21134    38.09
21135    38.22
21136    38.62
         ...  
34451    39.78
34452    42.71
34453    42.94
34454    43.81
34455    45.99
Name: VHI, Length: 416, dtype: float64


In [120]:
def finde_extreme_drought(percent, path):
    data = pd.read_csv(f'{path}/vhi_end.csv')
    total_of_area = data['area'].nunique()
    df_drought = data[(data.VHI <= 15)]
    df_drought = df_drought.drop_duplicates(subset=['year', 'area'])
    drought_counts = df_drought.groupby('year')['area'].nunique().reset_index()
    extreme_drought = drought_counts[drought_counts['area'] > (total_of_area / 100 * percent)]['year']
    extreme_drought_data = df_drought[df_drought['year'].isin(extreme_drought)]
    extreme_drought_data = extreme_drought_data.sort_values(by='year')
    print(extreme_drought_data[['year', 'area', 'VHI']])
    delete_files(f'{path}/vhi_extreme_drought.csv')
    extreme_drought_data.to_csv(f'{path}/vhi_extreme_drought.csv', index=False, encoding='utf-8')

finde_extreme_drought(20, './Data')

       year         area    VHI
950    2000    Vinnytsya  12.26
18435  2000    Kiev City  14.89
20623  2000         Kiev  12.51
38112  2000  Sevastopol'  13.14
44671  2000      Kharkiv  14.61
51227  2000     Cherkasy  14.64
