In [2]:
import os
import re
import glob
import datetime
import urllib.request
import pandas as pd

def clean_line(text):
    text = re.sub(r'<.*?>', '', text.decode('utf-8') if isinstance(text, bytes) else text).strip()
    text = re.sub(r'\s*,\s*', ',', text)
    return text

data_dir = "./vhi_data"
os.makedirs(data_dir, exist_ok=True)

base_url = "https://www.star.nesdis.noaa.gov/smcd/emb/vci/VH/get_TS_admin.php?country=UKR&provinceID={}&year1=1981&year2=2024&type=Mean"

for province_id in range(1, 28):
    pattern = os.path.join(data_dir, f"vhi_id_{province_id}_*.csv")
    existing_files = glob.glob(pattern)
    latest_file = max(existing_files, key=os.path.getmtime) if existing_files else None

    timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    new_filename = os.path.join(data_dir, f"vhi_id_{province_id}_{timestamp}.csv")

    current_file = latest_file if latest_file else new_filename
    url = base_url.format(province_id)

    try:
        response = urllib.request.urlopen(url)
        raw_new_data = response.read().splitlines()
        
        clean_new_data = [clean_line(line) for line in raw_new_data if clean_line(line)]
        
        if latest_file:
            with open(latest_file, 'r', encoding='utf-8') as f:
                raw_old_data = f.readlines()

            clean_old_data = [clean_line(line) for line in raw_old_data if clean_line(line)]
            
            if len(clean_new_data) > 1 and len(clean_old_data) > 1:
                if sorted(set(clean_new_data[1:])) == sorted(set(clean_old_data[1:])):
                    print(f"No new data for provinceID {province_id}. File not updated.")
                else:
                    with open(new_filename, 'w', encoding='utf-8') as f:
                        f.writelines("\n".join(clean_new_data))
                    print(f"Updated data found for provinceID {province_id}. New file saved as {new_filename}.")
                    
                    os.remove(latest_file)
                    print(f"Old file {latest_file} deleted.")
                    
                    current_file = new_filename
            else:
                print(f"Error: Not enough data to compare for provinceID {province_id}.")
        else:
            with open(new_filename, 'w', encoding='utf-8') as f:
                f.writelines("\n".join(clean_new_data))
            print(f"File {new_filename} successfully downloaded.")
            
            current_file = new_filename

    except Exception as e:
        print(f"Error downloading provinceID {province_id}: {e}")

    print(f"Current working file for provinceID {province_id}: {current_file}")


File ./vhi_data\vhi_id_1_2025-03-11_14-37-10.csv successfully downloaded.
Current working file for provinceID 1: ./vhi_data\vhi_id_1_2025-03-11_14-37-10.csv
File ./vhi_data\vhi_id_2_2025-03-11_14-37-12.csv successfully downloaded.
Current working file for provinceID 2: ./vhi_data\vhi_id_2_2025-03-11_14-37-12.csv
File ./vhi_data\vhi_id_3_2025-03-11_14-37-13.csv successfully downloaded.
Current working file for provinceID 3: ./vhi_data\vhi_id_3_2025-03-11_14-37-13.csv
File ./vhi_data\vhi_id_4_2025-03-11_14-37-14.csv successfully downloaded.
Current working file for provinceID 4: ./vhi_data\vhi_id_4_2025-03-11_14-37-14.csv
File ./vhi_data\vhi_id_5_2025-03-11_14-37-15.csv successfully downloaded.
Current working file for provinceID 5: ./vhi_data\vhi_id_5_2025-03-11_14-37-15.csv
File ./vhi_data\vhi_id_6_2025-03-11_14-37-16.csv successfully downloaded.
Current working file for provinceID 6: ./vhi_data\vhi_id_6_2025-03-11_14-37-16.csv
File ./vhi_data\vhi_id_7_2025-03-11_14-37-17.csv successfu

In [3]:
id_mapping = {
    1: 25, 2: 27, 3: 26, 4: 1, 5: 4, 6: 5, 7: 9, 8: 22, 9: 23, 10: 24,
    11: 12, 12: 10, 13: 11, 14: 13, 15: 14, 16: 15, 17: 17, 18: 18, 19: 19,
    20: 16, 21: 20, 22: 21, 23: 2, 24: 3, 25: 7, 26: 8, 27: 6
}

def process_files(folder):
    for filename in os.listdir(folder):
        if filename.endswith(".csv"):
            file_path = os.path.join(folder, filename)

            with open(file_path, "r", encoding="utf-8") as f:
                lines = f.readlines()
            
            header = lines[0]
            match = re.search(r"Province=\s*(\d+)", header)
            if match:
                province_id = int(match.group(1))
                new_id = id_mapping.get(province_id, province_id)
                header = re.sub(r"Province=\s*\d+", f"Province= {new_id}", header)
                lines[0] = header

                with open(file_path, "w", encoding="utf-8") as f:
                    f.writelines(lines)
            
            print(f"Updated Province in: {filename}")

process_files(data_dir)

Updated Province in: vhi_id_10_2025-03-11_14-37-19.csv
Updated Province in: vhi_id_11_2025-03-11_14-37-20.csv
Updated Province in: vhi_id_12_2025-03-11_14-37-21.csv
Updated Province in: vhi_id_13_2025-03-11_14-37-22.csv
Updated Province in: vhi_id_14_2025-03-11_14-37-22.csv
Updated Province in: vhi_id_15_2025-03-11_14-37-23.csv
Updated Province in: vhi_id_16_2025-03-11_14-37-24.csv
Updated Province in: vhi_id_17_2025-03-11_14-37-25.csv
Updated Province in: vhi_id_18_2025-03-11_14-37-26.csv
Updated Province in: vhi_id_19_2025-03-11_14-37-27.csv
Updated Province in: vhi_id_1_2025-03-11_14-37-10.csv
Updated Province in: vhi_id_20_2025-03-11_14-37-28.csv
Updated Province in: vhi_id_21_2025-03-11_14-37-29.csv
Updated Province in: vhi_id_22_2025-03-11_14-37-29.csv
Updated Province in: vhi_id_23_2025-03-11_14-37-30.csv
Updated Province in: vhi_id_24_2025-03-11_14-37-31.csv
Updated Province in: vhi_id_25_2025-03-11_14-37-32.csv
Updated Province in: vhi_id_26_2025-03-11_14-37-33.csv
Updated Pro

In [4]:
dfs = []

headers = ['Year', 'Week', 'SMN', 'SMT', 'VCI', 'TCI', 'VHI', 'empty']

for province_id in range(1, 28):
    pattern = rf'vhi_id_{province_id}_\d{{4}}-\d{{2}}-\d{{2}}_\d{{2}}-\d{{2}}-\d{{2}}\.csv'
    
    matched_files = [f for f in os.listdir(data_dir) if re.fullmatch(pattern, f)]
    
    if not matched_files:
        print(f"File for province_id {province_id} not found, skipping...")
        continue

    latest_file = os.path.join(data_dir, matched_files[0])
    
    try:
        df = pd.read_csv(latest_file, header=1, names=headers)
        df = df.drop(columns=["empty"], errors="ignore")
        df = df.set_index(["Year", "Week"])
        df.columns = pd.MultiIndex.from_product([[str(province_id)], df.columns])
        dfs.append(df)
        print(f"File {latest_file} processed successfully.")
    
    except Exception as e:
        print(f"Error processing {latest_file}: {e}")

if dfs:
    final_df = pd.concat(dfs, axis=1)
    final_df.to_csv("vhi_final.csv")
    print("File 'vhi_final.csv' saved successfully!")
    print(final_df.head())
else:
    print("No matching files found for merging.")


File ./vhi_data\vhi_id_1_2025-03-11_14-37-10.csv processed successfully.
File ./vhi_data\vhi_id_2_2025-03-11_14-37-12.csv processed successfully.
File ./vhi_data\vhi_id_3_2025-03-11_14-37-13.csv processed successfully.
File ./vhi_data\vhi_id_4_2025-03-11_14-37-14.csv processed successfully.
File ./vhi_data\vhi_id_5_2025-03-11_14-37-15.csv processed successfully.
File ./vhi_data\vhi_id_6_2025-03-11_14-37-16.csv processed successfully.
File ./vhi_data\vhi_id_7_2025-03-11_14-37-17.csv processed successfully.
File ./vhi_data\vhi_id_8_2025-03-11_14-37-17.csv processed successfully.
File ./vhi_data\vhi_id_9_2025-03-11_14-37-18.csv processed successfully.
File ./vhi_data\vhi_id_10_2025-03-11_14-37-19.csv processed successfully.
File ./vhi_data\vhi_id_11_2025-03-11_14-37-20.csv processed successfully.
File ./vhi_data\vhi_id_12_2025-03-11_14-37-21.csv processed successfully.
File ./vhi_data\vhi_id_13_2025-03-11_14-37-22.csv processed successfully.
File ./vhi_data\vhi_id_14_2025-03-11_14-37-22.c

In [5]:
file_path = "vhi_final.csv"

def load_data(file_path):
    """Виводить дані по VHI в залежності від області та року"""
    df = pd.read_csv(file_path, header=[0, 1], index_col=[0, 1])
    df = df.replace(-1, pd.NA).dropna()
    return df

def get_vhi_series(df, region_id, year):
    return df.loc[year, (str(region_id), "VHI")]

df = load_data(file_path)

print(get_vhi_series(df, 8, 2005))

Week
1     63.87
2     65.72
3     66.63
4     68.21
5     67.69
6     64.41
7     59.08
8     54.77
9     53.42
10    54.02
11    54.63
12    54.79
13    56.36
14     54.5
15    51.77
16    51.83
18    59.32
19    63.37
20    38.47
21    67.84
22     70.8
23    73.09
24    74.29
25    76.87
26    77.32
27    74.84
28    72.94
29     71.8
30     71.4
31    71.63
32    71.44
33    67.82
34     63.1
35    58.52
36    53.67
37    49.36
38    45.27
39    42.05
40    42.44
41     43.7
42    44.92
43    46.93
44    48.76
45    47.85
46    44.68
47    43.41
48    45.15
49    49.03
50    46.52
51    46.12
52    48.14
Name: (8, VHI), dtype: object


In [6]:
def get_extremes(df, region_ids, years):
    """Знаходить мінімальні, максимальні значення VHI, середнє та медіану для вказаних областей та років."""
    selected_data = df.loc[years, (list(map(str, region_ids)), "VHI")]
    selected_data = selected_data.dropna()
    return {
        "min": float(selected_data.min().min()),
        "max": float(selected_data.max().max()),
        "mean": float(selected_data.mean().mean()),
        "median": float(selected_data.median().median())
    }

print(get_extremes(df, [8, 10], range(2000, 2006)))

{'min': 9.36, 'max': 91.42, 'mean': 52.74688356164383, 'median': 53.0475}


In [7]:
def get_vhi_for_range(df, region_ids, start_year, end_year):
    """Повертає ряд VHI за вказаний діапазон років для вказаних областей."""
    return df.loc[start_year:end_year, (list(map(str, region_ids)), "VHI")].dropna()

pd.set_option("display.max_rows", None)
print(get_vhi_for_range(df, [3, 5], 1995, 1996))
pd.reset_option("display.max_rows")

               3      5
             VHI    VHI
Year Week              
1995 4     47.22  57.56
     5     46.49  51.35
     6     44.28  42.64
     7     41.76  34.23
     8     40.26  29.13
     9      40.3  26.64
     10    41.32  25.49
     11    41.83  25.71
     12    44.88  26.72
     13    47.86  28.79
     14     50.4  29.97
     15    52.28  31.51
     16    52.43  33.09
     17    53.37  38.15
     18    54.28  43.17
     19    54.43  46.68
     20     53.9  49.03
     21    53.29  50.29
     22    50.55  49.21
     23    49.39  47.87
     24    48.78  49.91
     25    46.58  56.72
     26    42.75  61.35
     27    38.85  62.43
     28    36.08  62.02
     29    34.41  62.29
     30    33.04  62.96
     31    31.01  63.84
     32    30.42  64.83
     33    30.46  65.45
     34     31.6  65.59
     35    35.36  65.31
     36    41.11  63.61
     37    45.81  60.76
     38    49.09  60.09
     39    51.05   58.9
     40    52.19  57.49
     41    49.85  55.01
     42    50.39

In [8]:
def detect_drought_years(df, threshold=20):
    """Виявляє роки, коли екстремальна посуха торкнулася більше threshold% областей."""
    total_regions = len(df.columns.get_level_values(0).unique())
    critical_regions = int((threshold / 100) * total_regions)
    
    drought_years = []
    for year in df.index.get_level_values(0).unique():
        yearly_data = df.loc[year, (slice(None), "VHI")]
        drought_areas = (yearly_data < 15).sum(axis=0)
        affected_regions = drought_areas[drought_areas > 0].index.get_level_values(0).tolist()
        
        if len(affected_regions) >= critical_regions:
            drought_years.append({
                "year": year,
                "regions": affected_regions,
                "vhi_values": yearly_data.loc[:, affected_regions].min().tolist()
            })
    
    return drought_years

print(detect_drought_years(df, threshold=20))

[{'year': 2000, 'regions': ['1', '8', '11', '12', '20', '24'], 'vhi_values': [10.68, 9.36, 10.6, 6.49, 8.14, 11.25]}, {'year': 2007, 'regions': ['4', '9', '16', '17', '26'], 'vhi_values': [13.28, 12.23, 5.94, 5.52, 10.88]}]
