# Preuzimanje meteoroloških podataka

## Učitavanje biblioteka

In [2]:
# Standardne biblioteke
import os
from datetime import datetime

# Numeričke i data analysis biblioteke
import pandas as pd
import numpy as np

# Vizualizacija
import matplotlib.pyplot as plt
import seaborn as sns

# API klijent za ERA5
import cdsapi
import xarray as xr

# Ostale korisne biblioteke
from transliterate import translit
from functools import reduce



## Učitavanje lokacija

In [None]:
# Učitavanje podataka o lokacijama iz Excel fajla
df_locations = pd.read_excel('polen-lokacije.xlsx')

# Alternativa: direktno učitavanje sa data.gov.rs (zakomentarisano za sada)
# df_locations = pd.read_excel(
#     'https://data.gov.rs/sr/datasets/r/2f9e795d-911b-4664-878f-cb58a02de30e'
# )

# Brisanje nepotrebnih kolona
df_locations.drop(columns=['Скраћено име', 'Недеља почиње у'], inplace=True)

# Preimenovanje kolona u jednostavnije nazive
df_locations.columns = ['location', 'latitude', 'longitude']

# Prevođenje imena lokacija sa ćirilice na latinicu
df_locations['location'] = df_locations['location'].apply(
    lambda x: translit(x, 'sr', reversed=True) if isinstance(x, str) else x
)

df_locations

## Preuzimanje i obrada ERA5 meteoroloških podataka po lokacijama

In [None]:
# Definisanje početne i krajnje godine i meseca za preuzimanje podataka
start_year = 2016
end_year = datetime.now().year
end_month = datetime.now().month

output_dir = "csv_output"
os.makedirs(output_dir, exist_ok=True)

# Inicijalizacija CDS API klijenta
client = cdsapi.Client()

def calculate_relative_humidity(temp_C, dewpoint_C):
    """
    Izračunavanje relativne vlažnosti na osnovu temperature vazduha i temperature rosišta
    """
    a = 17.67
    b = 243.5
    alpha_T = (a * temp_C) / (b + temp_C)
    alpha_Td = (a * dewpoint_C) / (b + dewpoint_C)
    rh = 100 * (np.exp(alpha_Td) / np.exp(alpha_T))
    return rh

# Petlja kroz sve lokacije
for i, (_, row) in enumerate(df_locations.iterrows()):
    # Ekstrakcija podataka o lokaciji
    name = row['location']
    lat = float(row['latitude'])
    lon = float(row['longitude'])

    global_df = pd.DataFrame()

    print(f"Obrada lokacije: {name}...")

    # Petlja kroz godine i mesece u definisanom periodu
    for year in range(start_year, end_year + 1):
        for month in range(1, 13):
            if year == end_year and month > end_month:
                continue

            monthly_dfs = []

            # Definisanje varijabli za preuzimanje:
            for var in [
                ("2m_temperature", "temperature_C", "t2m", "mean", lambda x: x - 273.15),
                ("2m_dewpoint_temperature", "dewpoint_C", "d2m", "mean", lambda x: x - 273.15),
                ("10m_u_component_of_wind", "u_wind", "u10", "mean", lambda x: x),
                ("10m_v_component_of_wind", "v_wind", "v10", "mean", lambda x: x),
                ("total_precipitation", "precipitation_mm", "tp", "sum", lambda x: x * 1000)
            ]:
                var_name, col_name1, col_name2, agg, transform = var
                filename = f"{output_dir}/{name}_{year}_{month:02d}_{var_name}.nc"

                try:
                    # Preuzimanje podataka sa CDS servera
                    client.retrieve(
                        "reanalysis-era5-single-levels",
                        {
                            "product_type": "reanalysis",
                            "variable": var_name,
                            "year": str(year),
                            "month": [f"{month:02d}"],
                            "day": [f"{d:02d}" for d in range(1, 32)],
                            "time": ["00:00"],
                            "format": "netcdf",
                            "area": [lat + 0.1, lon - 0.1, lat - 0.1, lon + 0.1],
                        },
                        filename
                    )
                    # Otvaranje preuzetog NetCDF fajla
                    ds = xr.open_dataset(filename, engine="netcdf4")
                    df = ds.to_dataframe().reset_index()
                    ds.close()

                    df_final = df[["valid_time", col_name2]].copy()
                    df_final[col_name1] = transform(df_final[col_name2])
                    df_final["date"] = pd.to_datetime(df_final["valid_time"]).dt.date

                    df_agg = df_final.groupby("date").agg({col_name1: agg}).reset_index()
                    monthly_dfs.append(df_agg)

                except Exception as e:
                    print(f"Greska za {name} {year}-{month:02d} {var_name}: {e}")
                    continue
                finally:
                    # Brisanje lokalnog NetCDF fajla posle obrade
                    if os.path.exists(filename):
                        os.remove(filename)

            # Spajanje svih varijabli u jedan DataFrame
            if monthly_dfs:
                monthly_df = reduce(lambda left, right: pd.merge(left, right, on='date', how='outer'), monthly_dfs)
                monthly_df = monthly_df.groupby("date", as_index=False).mean(numeric_only=True)

                # Izračunavanje relativne vlažnosti ako postoje temperature i temperature rosišta
                if 'temperature_C' in monthly_df and 'dewpoint_C' in monthly_df:
                    monthly_df['relative_humidity'] = calculate_relative_humidity(
                        monthly_df['temperature_C'], monthly_df['dewpoint_C']
                    )

                # Izračunavanje brzine vetra ako postoje u i v komponente
                if 'u_wind' in monthly_df and 'v_wind' in monthly_df:
                    monthly_df['wind_speed_mps'] = np.sqrt(monthly_df['u_wind']**2 + monthly_df['v_wind']**2)

                global_df = pd.concat([global_df, monthly_df], ignore_index=True)
                global_df = global_df.groupby('date', as_index=False).mean(numeric_only=True)

                csv_path = f"{output_dir}/{name}.csv"
                global_df.to_csv(csv_path, index=False)

                print(f"Sačuvan fajl za {year}-{month:02d}: {csv_path}")

    if global_df.empty:
        print(f"Nema podataka za lokaciju: {name}")


### Formatiranje i priprema meteoroloških podataka za dalju analizu

In [None]:
# Ulazni i izlazni folderi
input_folder = 'csv_output'
output_folder = 'meteo'

os.makedirs(output_folder, exist_ok=True)

# Lista svih CSV fajlova u input folderu
csv_files = [f for f in os.listdir(input_folder) if f.endswith('.csv')]

# Petlja kroz svaki CSV fajl radi formatiranja
for file in csv_files:
    file_path = os.path.join(input_folder, file)

    weather = pd.read_csv(file_path, parse_dates=['date'])

    # Izračunavanje smera vetra na osnovu u i v komponenti
    weather['wind_direction'] = np.arctan2(weather['v_wind'], weather['u_wind'])

    # Selektovanje i preimenovanje relevantnih kolona za izlazni fajl
    weather = weather[['date', 'temperature_C', 'precipitation_mm', 'relative_humidity', 'wind_speed_mps', 'wind_direction']]
    weather.rename(columns={
        'temperature_C': 'temperature',           # Temperatura u stepenima Celzijusa
        'precipitation_mm': 'precipitation',      # Padavine u mm
        'relative_humidity': 'humidity',           # Relativna vlažnost u %
        'wind_speed_mps': 'wind'                   # Brzina vetra u m/s
    }, inplace=True)
    
    output_path = os.path.join(output_folder, file)
    weather.to_csv(output_path, index=False)
