In [None]:
import os
import urllib.request
import datetime
import pandas as pd
import glob


In [None]:
def create_folder(folder_name):
    if not os.path.exists(folder_name):
        os.makedirs(folder_name, exist_ok=True)
    print(f"Folder '{folder_name}' was successfully created or already exists.")

def download_vhi_data(region_id, start_year=1981, end_year=2024):
    url = f"https://www.star.nesdis.noaa.gov/smcd/emb/vci/VH/get_TS_admin.php?country=UKR&provinceID={region_id}&year1={start_year}&year2={end_year}&type=Mean"
    response = urllib.request.urlopen(url)
    if response.status == 200:
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')
        filename = f'data/vhi_{region_id}_{timestamp}.csv'
        with open(filename, 'wb') as file:
            file.write(response.read())
        print(f"Data for region {region_id} saved to {filename}.")
    else:
        print(f"Failed to download data for region {region_id}.")
        

In [None]:
create_folder('data')

for region in range(1, 28):
    download_vhi_data(region)

In [None]:
region_id_map = {
    1: 24, 2: 25, 3: 5, 4: 4, 5: 3, 6: 4, 7: 8, 8: 19, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 20, 20: 20, 21: 21, 22: 22, 23: 23, 24: 1, 25: 25, 26: 26, 27: 27
}

region_names_ukr = {
    1: "Вінницька",
    2: "Волинська",
    3: "Дніпропетровська",
    4: "Донецька",
    5: "Житомирська", 
    6: "Закарпатська",
    7: "Запорізька", 
    8: "Івано-Франківська",
    9: "Київська",
    10: "Кіровоградська", 
    11: "Луганська",
    12: "Львівська",
    13: "Миколаївська",
    14: "Одеська",
    15: "Полтавська", 
    16: "Рівненська",
    17: "Сумська",
    18: "Тернопільська",
    19: "Харківська",
    20: "Херсонська", 
    21: "Хмельницька",
    22: "Черкаська",
    23: "Чернівецька",
    24: "Чернігівська",
    25: "Республіка Крим"
}

In [None]:
reverse_region_id_map = {v: k for k, v in region_id_map.items()}

def rename_regions(df):
    df['region_id'] = df['region_id'].map(region_id_map)
    return df

In [None]:
def create_data_frame(folder_path1):
    csv_files = glob.glob(folder_path1 + "/*.csv")

    headers = ['Year', 'Week', 'SMN', 'SMT', 'VCI', 'TCI', 'VHI', 'empty']
    frames = []

    for file in csv_files:
        region_id1 = int(file.split('_')[1]) 
        df = pd.read_csv(file, header=1, names=headers)
        df.at[0, 'Year'] = df.at[0, 'Year'][9:]
        # df = df.drop(df.index[-1])  
        df = df.drop(df.loc[df['VHI'] == -1].index)
        df = df.drop('empty', axis=1)
        df.insert(0, 'region_id', region_id1, True)
        
        df['Year'] = df['Year'].str.extract(r'(\d+)', expand=False)
        df = df.dropna(subset=['Year'])
        df['Year'] = df['Year'].astype(int)
        
        frames.append(df)

    result = pd.concat(frames).drop_duplicates().reset_index(drop=True)
    return rename_regions(result)

In [None]:
data = create_data_frame('data')

print(data, '\n')

In [None]:
def analyze_region_years(df, region_id, year_range):
    df = df.dropna(subset=['Year'])  # Видалити рядки зі значеннями NaN у стовпці Year
    df.loc[:, 'Year'] = df['Year'].astype(int)
    region_data = df[(df['region_id'] == region_id) & df['Year'].between(year_range[0], year_range[1])]
    if not region_data.empty:
        min_vhi = region_data['VHI'].min()
        max_vhi = region_data['VHI'].max()
        original_region_id = reverse_region_id_map.get(region_id, None)
        region_name = region_names_ukr.get(original_region_id, "Невідомий регіон")
        print(f"Регіон: {region_name}, Min VHI: {min_vhi}, Max VHI: {max_vhi}")

In [None]:
for region in range(1, 26):
    analyze_region_years(data, region, (1985, 2010))

In [None]:
def drought_years_analysis(df, years_range=("1981", "2024"), selected_regions=None, threshold_extreme=15, threshold_moderate=(15, 35), percent_threshold=20):
    extreme_drought_years = []
    moderate_drought_years = []

    if selected_regions:
        df = df[df['region_id'].isin(selected_regions)]

    total_regions = len(set(df['region_id']))
    min_regions_affected = (percent_threshold / 100) * total_regions

    for year in range(int(years_range[0]), int(years_range[1]) + 1):
        yearly_data = df[df['Year'] == year]

        if yearly_data.empty:
            continue

        extreme_drought_count = len(yearly_data[yearly_data['VHI'] < threshold_extreme])
        moderate_drought_count = len(yearly_data[(yearly_data['VHI'] >= threshold_moderate[0]) & (yearly_data['VHI'] <= threshold_moderate[1])])

        if extreme_drought_count >= min_regions_affected:
            extreme_drought_years.append(year)
        if moderate_drought_count >= min_regions_affected:
            moderate_drought_years.append(year)

    return extreme_drought_years, moderate_drought_years

In [None]:
extreme_drought_years, moderate_drought_years = drought_years_analysis(data, years_range=("1999", "2001"), selected_regions=[3, 5, 12])
print("Екстремальні посухи для обраних регіонів:", extreme_drought_years)
print("Помірні посухи для обраних регіонів:", moderate_drought_years)