Лаборатна робота №2 Шкуропінського Максима студента групи ФБ-33

In [1]:
import os
import urllib.request
import pandas as pd
from datetime import datetime
import re

In [14]:
# Створення директорії для збереження файлів
DATA_DIR = "data"
os.makedirs(DATA_DIR, exist_ok=True)

# Функція для завантаження даних
def download_vhi_data(province_id):
    url = f"https://www.star.nesdis.noaa.gov/smcd/emb/vci/VH/get_TS_admin.php?country=UKR&provinceID={province_id}&year1=1981&year2=2024&type=Mean"
    now = datetime.now().strftime("%d%m%Y%H%M%S")
    filename = f"{DATA_DIR}/NOAA_ID{province_id}_{now}.csv"
    
    try:
        response = urllib.request.urlopen(url)
        with open(filename, "wb") as out_file:
            out_file.write(response.read())
        print(f"Downloaded: {filename}")
    except Exception as e:
        print(f"Error downloading data for province {province_id}: {e}")

# Завантаження даних для всіх областей
for province in range(1, 28):
    download_vhi_data(province)

Downloaded: data/NOAA_ID1_13032025010726.csv
Downloaded: data/NOAA_ID2_13032025010727.csv
Downloaded: data/NOAA_ID3_13032025010728.csv
Downloaded: data/NOAA_ID4_13032025010730.csv
Downloaded: data/NOAA_ID5_13032025010730.csv
Downloaded: data/NOAA_ID6_13032025010731.csv
Downloaded: data/NOAA_ID7_13032025010732.csv
Downloaded: data/NOAA_ID8_13032025010733.csv
Downloaded: data/NOAA_ID9_13032025010734.csv
Downloaded: data/NOAA_ID10_13032025010735.csv
Downloaded: data/NOAA_ID11_13032025010736.csv
Downloaded: data/NOAA_ID12_13032025010737.csv
Downloaded: data/NOAA_ID13_13032025010738.csv
Downloaded: data/NOAA_ID14_13032025010739.csv
Downloaded: data/NOAA_ID15_13032025010740.csv
Downloaded: data/NOAA_ID16_13032025010741.csv
Downloaded: data/NOAA_ID17_13032025010742.csv
Downloaded: data/NOAA_ID18_13032025010743.csv
Downloaded: data/NOAA_ID19_13032025010745.csv
Downloaded: data/NOAA_ID20_13032025010746.csv
Downloaded: data/NOAA_ID21_13032025010747.csv
Downloaded: data/NOAA_ID22_13032025010748.c

In [15]:
# Функція для очищення тексту від HTML-тегів
def remove_html(text):
    return re.sub(r'<.*?>', '', text)

# Функція для зчитування та обробки CSV-файлів
def read_clean_csv(filepath):
    headers = ['Year', 'Week', 'SMN', 'SMT', 'VCI', 'TCI', 'VHI', 'empty']
    try:
        df = pd.read_csv(filepath, header=1, names=headers, converters={'Year': remove_html}, skipinitialspace=True)
        df = df.drop(columns=['empty'], errors='ignore')
        df = df.dropna()
        df = df[df['VHI'] != -1]
        match = re.search(r'NOAA_ID(\d+)_', filepath)
        region_id = int(match.group(1)) if match else None
        df['region_id'] = region_id
        return df
    except pd.errors.ParserError as e:
        print(f"Error reading {filepath}: {e}")
        return None

# Функція для зчитування всіх файлів у директорії
def load_data_from_directory(directory):
    data_frames = []
    for filename in os.listdir(directory):
        if filename.endswith('.csv'):
            file_path = os.path.join(directory, filename)
            df = read_clean_csv(file_path)
            if df is not None:
                data_frames.append(df)
    
    if data_frames:
        return pd.concat(data_frames, ignore_index=True)
    else:
        return pd.DataFrame()

# Зчитування та об'єднання очищених даних
final_df = load_data_from_directory(DATA_DIR)

In [17]:
final_df.head()

Unnamed: 0,Year,Week,SMN,SMT,VCI,TCI,VHI,region_id
0,1982,1.0,0.059,258.24,51.11,48.78,49.95,10
1,1982,2.0,0.063,261.53,55.89,38.2,47.04,10
2,1982,3.0,0.063,263.45,57.3,32.69,44.99,10
3,1982,4.0,0.061,265.1,53.96,28.62,41.29,10
4,1982,5.0,0.058,266.42,46.87,28.57,37.72,10


In [18]:
final_df.tail()

Unnamed: 0,Year,Week,SMN,SMT,VCI,TCI,VHI,region_id
59017,2024,48.0,0.135,278.17,55.23,11.86,33.55,9
59018,2024,49.0,0.133,277.08,57.71,10.86,34.29,9
59019,2024,50.0,0.13,276.49,59.45,8.68,34.07,9
59020,2024,51.0,0.128,276.45,62.53,5.55,34.04,9
59021,2024,52.0,0.129,276.48,66.13,3.71,34.92,9


Змінюємо індекси, за українською абеткою:

In [20]:
replace_region = { 1:22, 2:24, 3:23, 4:25, 5:3, 6:4, 7:8, 8:19, 9:20, 10:21, 11:9, 13:10,
                  14:11, 15:12, 16:13, 17:14, 18:15, 19:16, 21:17, 22:18, 23:6, 24:1, 25:2,
                 26:7, 27:5}

final_df['region_id'] = final_df['region_id'].replace(replace_region)

In [21]:
final_df.head()

Unnamed: 0,Year,Week,SMN,SMT,VCI,TCI,VHI,region_id
0,1982,1.0,0.059,258.24,51.11,48.78,49.95,21
1,1982,2.0,0.063,261.53,55.89,38.2,47.04,21
2,1982,3.0,0.063,263.45,57.3,32.69,44.99,21
3,1982,4.0,0.061,265.1,53.96,28.62,41.29,21
4,1982,5.0,0.058,266.42,46.87,28.57,37.72,21


Ряд VHI для області за вказаний рік

In [29]:
final_df['Year'] = final_df['Year'].astype("int")
def get_vhi_by_year(df, region, year):
    return df[(df['region_id'] == region) & (df['Year'] == year)][['VHI']]

In [30]:
get_vhi_by_year(final_df, 1, 2006)

Unnamed: 0,VHI
33988,43.87
33989,44.83
33990,45.51
33991,45.05
33992,44.2
33993,42.88
33994,40.77
33995,39.87
33996,42.33
33997,44.24


Пошук екстремумів (min та max) для вказаних областей та років, 
середнього, медіани

In [44]:
def get_extremes(df, region, year):
    data = df[(df['region_id'] == region) & (df['Year'] == year)]['VHI']
    return f"Min: {float(data.min())}, Max: {float(data.max())}, Mean: {float(data.mean())}, Median: {float(data.median())}"

In [45]:
get_extremes(final_df, 5, 2018)

'Min: 39.32, Max: 60.82, Mean: 49.5075, Median: 49.1'

Ряд VHI за вказаний діапазон років для вказаних областей

In [49]:
def get_vhi_by_year_range(df, region, start_year, end_year):
    return df[(df['region_id'] == region) & 
    (df['Year'].between(start_year, end_year))][['Year', 'Week', 'VHI']]

In [50]:
get_vhi_by_year_range(final_df, 18, 2000, 2005)

Unnamed: 0,Year,Week,VHI
29324,2000,1.0,34.34
29325,2000,2.0,33.99
29326,2000,3.0,35.27
29327,2000,4.0,36.71
29328,2000,5.0,37.17
...,...,...,...
29611,2005,48.0,46.67
29612,2005,49.0,49.46
29613,2005,50.0,49.87
29614,2005,51.0,48.88


Для всього набору даних виявити роки, протягом яких екстремальні 
посухи торкнулися більше вказаного відсотка областей по Україні (20% 
областей - 5 областей з 25). Повернути роки, назви областей з
екстремальними посухами та значення VHI

In [57]:
def find_drought_years(df, percentage=20, VHI=15):
    drought_years = {}
    total_regions = df['region_id'].nunique()
    threshold = (percentage / 100) * total_regions
    
    for year in df['Year'].unique():
        drought_areas = df[(df['Year'] == year) & (df['VHI'] < VHI)]['region_id'].unique()
        drought_areas = [int(area) for area in drought_areas]
        
        if len(drought_areas) >= threshold:
            drought_years[int(year)] = drought_areas
    
    return drought_years


In [58]:
find_drought_years(final_df, percentage=20, VHI=15)

{2000: [9, 12, 22, 20, 1, 19], 2007: [13, 14, 7, 25, 20]}