# Funkcije za iscrtavanje grafika

In [1]:
# Standardne biblioteke
import re
import warnings
from datetime import date
import calendar

# Obrada podataka
import numpy as np
import pandas as pd

# Statističke i matematičke funkcija
from scipy import stats
from scipy.stats import boxcox
from scipy.signal import periodogram

# Time series modeliranje
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Machine learning modeli, metrika i evaluacija
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Vizuelizacija
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.collections import LineCollection
from matplotlib.lines import Line2D
from matplotlib_scalebar.scalebar import ScaleBar
import seaborn as sns
from adjustText import adjust_text

# Geoprostorne biblioteke
import geopandas as gpd
import contextily as ctx
from geopy.distance import geodesic

# Ostale korisne biblioteke
from transliterate import translit
from rapidfuzz import process, fuzz

# Isključivanje upozorenja
warnings.filterwarnings('ignore')





## Učitavanje alergnea

In [None]:
# Učitavanje podataka o alergenima
df_allergens = pd.read_excel('data\\polen-alergeni.xlsx')

# Alternativa: direktno sa data.gov.rs
# df_allergens = pd.read_excel('https://data.gov.rs/sr/datasets/r/2f9e795d-911b-4664-878f-cb58a02de30e')

# Brisanje nepotrebnih kolona
df_allergens.drop(columns=['Biljna vrsta', 'Narodni naziv', 'Engleski naziv', 'slika / link'], inplace=True)

# Preimenovanje kolona
df_allergens.columns = ['allergen', 'Tip alergena', 'Donja vrednost', 'Gornja vrednost', 'Period emitovanja polena', 'Alergenost']

# Transliteracija i formatiranje
df_allergens['allergen'] = df_allergens['allergen'].apply(lambda x: translit(x, 'sr', reversed=True) if isinstance(x, str) else x)
df_allergens['Period emitovanja polena'] = df_allergens['Period emitovanja polena'].apply(lambda x: translit(x, 'sr', reversed=True) if isinstance(x, str) else x)
df_allergens['Period emitovanja polena'] = df_allergens['Period emitovanja polena'].str.lower()

In [3]:
# Mapiranje meseci i pozicija
meseci = {
    'januar': 1, 'februar': 2, 'mart': 3, 'april': 4,
    'maj': 5, 'jun': 6, 'jul': 7, 'avgust': 8,
    'septembar': 9, 'oktobar': 10, 'novembar': 11, 'decembar': 12
}

pozicije = {
    'početak': 1,
    'sredina': 15,
    'kraj': 28
}

# Funkcija za parsiranje perioda emitovanja polena
def parse_period(tekst):
    tekst = tekst.lower().strip()
    pattern = r'(?:(početak|sredina|kraj)\s)?([a-zšđčćž]+)-?\s?(?:(početak|sredina|kraj)\s)?([a-zšđčćž]+)'
    match = re.search(pattern, tekst)
    
    if not match:
        return None, None

    start_part, start_month, end_part, end_month = match.groups()

    start_day = pozicije.get(start_part, 5)
    start_month_num = meseci.get(start_month.rstrip('a'))

    if end_part == 'početak':
        end_day = 10
        end_month_num = meseci.get(end_month.rstrip('a'))
    elif end_part == 'kraj':
        end_day = 1
        end_month_num = meseci.get(end_month.rstrip('a')) + 1
    else:
        end_day = 25
        end_month_num = meseci.get(end_month.rstrip('a'))
    
    if not start_month_num or not end_month_num:
        return None, None

    start_date = (date(2024, start_month_num, start_day) - pd.Timedelta(days=6)).strftime('%m-%d')
    end_date = (date(2024, end_month_num, end_day) + pd.Timedelta(days=6)).strftime('%m-%d')
    
    return start_date, end_date

# Dodavanje start_date i end_date kolona
df_allergens[['start_date', 'end_date']] = df_allergens['Period emitovanja polena'].apply(
    lambda x: pd.Series(parse_period(x))
)

df_allergens

Unnamed: 0,allergen,Tip alergena,Donja vrednost,Gornja vrednost,Period emitovanja polena,Alergenost,start_date,end_date
0,AMBROZIJA,КОРОВИ (АМБРОЗИЈА),30,100,sredina jula- septembar,Веома јака,07-09,10-01
1,BOKVICA,КОРОВИ,60,100,maj- septembar,Слаба до умерена,04-29,10-01
2,ČETINARI,ДРВЕЋЕ,60,100,mart- kraj maja,Слаба,02-28,06-07
3,BREZA,ДРВЕЋЕ,60,100,mart- početak juna,Веома јака,02-28,06-16
4,BREST,ДРВЕЋЕ,60,100,mart-april,Умерена,02-28,05-01
5,BUKVA,ДРВЕЋЕ,60,100,mart- maj,Слаба до умерена,02-28,05-31
6,VRBA,ДРВЕЋЕ,60,100,mart- kraj maja,Слаба,02-28,06-07
7,GRAB,ДРВЕЋЕ,60,100,mart-maj,Слаба до умерена,02-28,05-31
8,DUD,ДРВЕЋЕ,60,100,april- jun,Слаба до умерена,03-30,07-01
9,JAVOR,ДРВЕЋЕ,60,100,mart- kraj maja,Слаба до умерена,02-28,06-07


## Generalno upotrebljivi grafici

### Prikaz dnevne koncentraciju polena u toku cele godine.

In [None]:
def plot_daily_concentration(data, col='value', save=False):
    """
    Prikaz dnevne koncentraciju polena u toku cele godine.
    """
    # Dohvatanje alergena i lokacije
    allergen = data['allergen'].iloc[0]
    location = data['location'].iloc[0]

    # Generisanje liste svih dana u opsegu podataka
    all_days = pd.date_range(start=data['date'].min(), end=data['date'].max())
    df_full = pd.DataFrame({'date': all_days}).merge(data[['date', col]], on='date', how='left')

    # Pronalaženje blokova gde nedostaju podaci
    na_mask = df_full[col].isna()
    na_blocks = []
    start_na = None
    for i, is_na in enumerate(na_mask):
        if is_na and start_na is None:
            start_na = df_full['date'].iloc[i]
        elif not is_na and start_na is not None:
            end_na = df_full['date'].iloc[i]
            na_blocks.append((start_na, end_na))
            start_na = None
    if start_na is not None:
        na_blocks.append((start_na, df_full['date'].iloc[-1] + pd.Timedelta(days=1)))

    fig, ax = plt.subplots(figsize=(12, 5))

    # Obeležavanje perioda bez podataka sivom bojom
    for start, end in na_blocks:
        ax.axvspan(start, end, color='lightgray', label='Nedostaju podaci')

    # Crtanje step grafikona koncentracije polena
    ax.step(df_full['date'], df_full[col], where='post', marker='', color='black', linewidth=0.6, label='Koncentracija polena')

    # Dodavanje kolone godine za svaku opservaciju
    df_full['year'] = df_full['date'].dt.year

    # Dohvatanje početka i kraja sezone za alergen
    start_md = df_allergens.loc[df_allergens['allergen'] == allergen, "start_date"].iat[0]
    end_md = df_allergens.loc[df_allergens['allergen'] == allergen, "end_date"].iat[0]

    # Crtanje isprekidanih linija ±7 dana od početka i kraja sezone za svaku godinu
    years = df_full['year'].unique()
    for year in years:
        try:
            start_line = pd.to_datetime(f"{year}-{start_md}") - pd.Timedelta(days=7)
            end_line = pd.to_datetime(f"{year}-{end_md}") + pd.Timedelta(days=7)
            ax.axvline(x=start_line, color='black', linestyle='--', linewidth=1.0, alpha=0.9, label='Početak/kraj sezone')
            ax.axvline(x=end_line, color='black', linestyle='--', linewidth=1.0, alpha=0.9)
        except:
            continue

    title = f'Dnevna koncentracija polena - {allergen}'
    if location is not None:
        title += f' ({location})'
    ax.set_title(title, fontsize=22, fontweight='bold', pad=20)

    # Povećavanje fonta za ose
    ax.set_ylabel('Koncentracija polena [grains/$m^3$]', fontsize=12, labelpad=15)
    ax.set_xlabel('Datum', fontsize=16, labelpad=15)

    # Povećavanje tickova
    ax.tick_params(axis='x', labelsize=12, rotation=45)
    ax.tick_params(axis='y', labelsize=12)

    # Uklanjanje duplikata iz legende i povećanje fonta legende
    handles, labels = ax.get_legend_handles_labels()
    unique = dict(zip(labels, handles))
    ax.legend(unique.values(), unique.keys(), fontsize=12, loc='upper right')

    plt.tight_layout()

    # Snimanje figure
    if save:
        filename = f"grafici\\{allergen}_{location}_daily_concentration.png" if location else f"{allergen}_daily_concentration.png"
        plt.savefig(filename, dpi=75)
    plt.show()



### Prikaz dnevne koncentraciju polena u toku sezone

In [None]:
def plot_seasonal_concentration(data, col='value', save=False):
    """
    Prikaz dnevne koncentraciju polena u toku sezone.
    """
    data = data.copy()

    # Dodavanje kolona za godinu i mesec-dan
    data['year'] = data['date'].dt.year
    data['month_day'] = data['date'].dt.strftime('%m-%d')

    # Dohvatanje alergena i lokacije
    allergen = data['allergen'].iloc[0]
    location = data['location'].iloc[0]

    # Dohvatanje datuma početka i kraja sezone iz df_allergens DataFrame
    start_md = df_allergens.loc[df_allergens['allergen'] == allergen, "start_date"].iat[0]
    end_md = df_allergens.loc[df_allergens['allergen'] == allergen, "end_date"].iat[0]

    # Lista svih godina u podacima
    years = sorted(data['year'].unique())
    full_season = []

    # Kreiranje DataFrame-a sa svim danima u sezoni za svaku godinu
    for year in years:
        start = pd.to_datetime(f"{year}-{start_md}")
        end = pd.to_datetime(f"{year}-{end_md}")
        season_days = pd.date_range(start, end)
        season_df = pd.DataFrame({'date': season_days})
        season_df['year'] = year
        full_season.append(season_df)

    full_df = pd.concat(full_season, ignore_index=True)

    # Spajanje sa originalnim podacima
    df_plot = full_df.merge(data[['date', col]], on='date', how='left')

    # Dodavanje indeksa za x osu
    df_plot = df_plot.sort_values('date')
    df_plot['plot_index'] = range(len(df_plot))

    # Pronalaženje blokova gde nedostaju podaci
    na_mask = df_plot[col].isna()
    na_blocks = []
    start_na = None
    for i, is_na in enumerate(na_mask):
        if is_na and start_na is None:
            start_na = df_plot['plot_index'].iloc[i]
        elif not is_na and start_na is not None:
            end_na = df_plot['plot_index'].iloc[i]
            na_blocks.append((start_na, end_na))
            start_na = None
    if start_na is not None:
        na_blocks.append((start_na, df_plot['plot_index'].iloc[-1] + 1))

    # Granice između godina
    year_changes = df_plot.groupby('year')['plot_index'].min().values[1:]
    
    fig, ax = plt.subplots(figsize=(14, 5))

    # Obeležavanje perioda bez podataka sivom bojom
    for start_na, end_na in na_blocks:
        ax.axvspan(start_na, end_na, color='lightgray', label='Nedostaju podaci')

    # Crtanje linije koncentracije polena
    ax.plot(df_plot['plot_index'], df_plot[col], label='Koncentracija polena', linewidth=0.6, color='black')

    # Dodavanje isprekidanih linija između godina
    for x in year_changes:
        ax.axvline(x=x, color='black', linestyle='--', linewidth=0.6)

    # Postavljanje naslova
    title = f'Sezonska koncentracija polena - {allergen}'
    if location is not None:
        title += f' ({location})'
    ax.set_title(title, fontsize=16, fontweight='bold', pad=15)

    # Postavljanje oznaka osa
    ax.set_ylabel('Koncentracija polena [grains/$m^3$]')
    ax.set_xlabel('Sezonski dani')

    # Uklanjanje duplikata iz legende i pozicioniranje u gornji levi ugao
    handles, labels = ax.get_legend_handles_labels()
    unique = dict(zip(labels, handles))
    ax.legend(unique.values(), unique.keys(), loc='upper right', fontsize=10, frameon=True)

    # Kreiranje tick pozicija i labela (svaki drugi mesec)
    tick_positions = []
    tick_labels = []
    seen_labels = set()
    all_tick_positions = []

    for year in years:
        df_year = df_plot[df_plot['year'] == year]
        df_year['month'] = df_year['date'].dt.month

        grouped = df_year.groupby('month')
        check = -1
        for month, group in grouped:
            check += 1
            if check == 0:
                continue
            if check > 6:
                break
            pos = group['plot_index'].iloc[0]
            all_tick_positions.append(pos)

            if month % 2 != 0:
                label = f"{calendar.month_abbr[month]} {year}"
                if label not in seen_labels:
                    tick_positions.append(pos)
                    tick_labels.append(label)
                    seen_labels.add(label)

    ax.set_xticks(all_tick_positions, minor=True)
    ax.tick_params(axis='x', which='minor', length=3, color='gray')

    ax.set_xticks(tick_positions)
    ax.set_xticklabels(tick_labels, rotation=45, fontsize=8)

    plt.tight_layout()

    # Snimanje figure
    if save:
        filename = f"grafici\\{allergen}_{location}_sezonska_koncentracija.png" if location else f"{allergen}_sezonska_koncentracija.png"
        plt.savefig(filename, dpi=75)

    plt.show()


In [None]:
def plot_seasonal_concentration_imp(data, mode=0, save=False):
    """
    Prikaz dnevne koncentracije polena u toku sezone sa označenim imputiranim vrednostima.
    """
    # Sortiranje i priprema podataka
    data = data.sort_values('date').copy()
    data['year'] = data['date'].dt.year
    data['month'] = data['date'].dt.month
    data['plot_index'] = range(len(data))

    allergen = data['allergen'].iloc[0]
    location = data['location'].iloc[0]

    # Granice između godina
    year_changes = data.groupby('year')['plot_index'].min().values[1:]

    x = data['plot_index'].values
    y = data['value'].values
    imputed = data['imputed'].values

    segments = []
    colors = []
    color_map = {mode: 'black', 1 - mode: 'gray'}

    for i in range(len(x) - 1):
        if np.isnan(y[i]) or np.isnan(y[i + 1]):
            continue
        color = color_map.get(imputed[i + 1], 'gray')
        segments.append([[x[i], y[i]], [x[i + 1], y[i + 1]]])
        colors.append(color)

    fig, ax = plt.subplots(figsize=(14, 5))
    lc = LineCollection(segments, colors=colors, linewidths=1.3)
    ax.add_collection(lc)

    # Skaliranje osa
    ax.autoscale()
    ax.set_xlim(x.min(), x.max())
    ax.set_ylim(0, np.nanmax(y) * 1.1)

    # Vertikalne linije između godina
    for xline in year_changes:
        ax.axvline(x=xline, color='black', linestyle='--', linewidth=0.5, alpha=0.7)

    # Naslov
    title = f"Sezonska koncentracija polena – {allergen}"
    if location:
        title += f", lokacija: {location}"
    ax.set_title(title, fontsize=14, fontweight='bold')

    # Oznake osa
    ax.set_ylabel('Koncentracija polena [grains/$m^3$]')
    ax.set_xlabel('Sezonski dani')

    # Ticks za mesece
    tick_positions = []
    tick_labels = []
    seen_labels = set()
    all_tick_positions = []

    for year in data['year'].unique():
        check = -1
        for month in sorted(data[data['year'] == year]['month'].unique()):
            check += 1
            if check == 0:
                continue
            if check > 6:
                break
            pos = data[(data['year'] == year) & (data['month'] == month)]['plot_index'].iloc[0]
            all_tick_positions.append(pos)

            if month % 2 != 0:
                label = f"{calendar.month_abbr[month]} {year}"
                if label not in seen_labels:
                    tick_positions.append(pos)
                    tick_labels.append(label)
                    seen_labels.add(label)

    ax.set_xticks(all_tick_positions, minor=True)
    ax.tick_params(axis='x', which='minor', length=3, color='gray')
    ax.set_xticks(tick_positions)
    ax.set_xticklabels(tick_labels, rotation=45, fontsize=8)

    # Legenda – donji desni ugao
    custom_lines = [
        Line2D([0], [0], color=color_map[0], lw=2, label='Originalni podaci'),
        Line2D([0], [0], color=color_map[1], lw=2, label='Imputirani podaci')
    ]
    ax.legend(handles=custom_lines, loc='upper right', frameon=False, fontsize=9)

    plt.tight_layout()

    if save:
        filename = f"grafici\\{allergen}_{location}_imputed_concentration.png" if location else f"{allergen}_imputed_concentration.png"
        plt.savefig(filename, dpi=100, bbox_inches='tight')

    plt.show()


## Grafici za poseban notebook

### data-loading_and_exploration

In [None]:
def plot_monitoring_locations(locations_df, save=False):
    """
    Crtanje mape za monitoring lokacija polena u Srbiji.
    """
    gdf_locations = gpd.GeoDataFrame(
        locations_df,
        geometry=gpd.points_from_xy(locations_df.longitude, locations_df.latitude),
        crs="EPSG:4326"
    ).to_crs(epsg=3857)

    fig, ax = plt.subplots(figsize=(10, 10))
    gdf_locations.plot(ax=ax, color='darkred', markersize=80, edgecolor='white', linewidth=0.3)

    texts = []
    for x, y, label in zip(gdf_locations.geometry.x, gdf_locations.geometry.y, gdf_locations['location']):
        texts.append(ax.text(x, y + 2000, label, fontsize=8, fontweight='bold', color='navy'))
    adjust_text(texts, ax=ax, arrowprops=dict(arrowstyle="->", color='black', lw=0.3))

    ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik, alpha=0.8)

    ax.grid(True, which='both', linestyle=':', color='lightgrey', alpha=0.5)
    ax.add_artist(ScaleBar(1, location='lower right', box_alpha=0.5, color='black'))

    ax.annotate('N', xy=(0.95, 0.1), xytext=(0.95, 0.2),
                arrowprops=dict(facecolor='black', width=5, headwidth=15),
                ha='center', va='center', fontsize=12, xycoords=ax.transAxes)

    ax.set_title('Lokacije praćenja polena u Srbiji', fontsize=14, fontweight='bold', color='navy', pad=20)
    ax.axis('off')
    plt.tight_layout()

    if save:
        plt.savefig('grafici/mapa_lokacija.png', dpi=75)

    plt.show()

In [None]:
def plot_meteo_by_city(meteo, cities, allergen, save=False):
    """
    Scatter + regplot za zavisnost koncentracije od meteoroloških parametara po gradovima za dati alergen.
    """

    meteo_vars = ['temperature', 'precipitation', 'humidity', 'wind', 'wind_direction']
    meteo_vars_sr = ['Temperatura [°C]',
                     'Padavine [mm]',
                     'Relativna vlažnost [%]',
                     'Brzina vetra [m/s]',
                     'Pravac vetra [rad]']

    fig, axes = plt.subplots(len(meteo_vars), len(cities), 
                             figsize=(4*len(cities), 3*len(meteo_vars)), 
                             sharey=True, sharex=False)

    for row_idx, (var, var_sr) in enumerate(zip(meteo_vars, meteo_vars_sr)):
        for col_idx, city in enumerate(cities):
            ax = axes[row_idx, col_idx] if len(cities) > 1 else axes[row_idx]

            city_data = meteo[(meteo['location'] == city) & (meteo['allergen'] == allergen)]

            # Scatter plot
            sns.scatterplot(x=city_data[var], y=city_data['value'], ax=ax,
                            color='gray', edgecolor=None, alpha=0.4, s=20)

            # Regresiona linija
            sns.regplot(x=city_data[var], y=city_data['value'],
                        scatter=False, color='black', ax=ax, line_kws={'linestyle': '--'})

            # Korelacija
            corr = city_data[var].corr(city_data['value'])

            if row_idx == 0:
                ax.set_title(f'{city}\n$r$ = {corr:.2f}', fontsize=12, fontweight='bold')
            else:
                ax.set_title(f'$r$ = {corr:.2f}', fontsize=12, fontweight='bold')

            ax.set_xlabel(var_sr, fontsize=11)
            ax.set_ylabel('')

    fig.text(0.01, 0.5, 'Koncentracija polena [grains/$m^3$]', va='center', rotation='vertical', fontsize=14)

    plt.suptitle(f'{allergen}: Zavisnost koncentracije polena od meteoroloških parametara po gradovima',
                 fontsize=16, fontweight='bold')

    plt.tight_layout(rect=[0.05, 0.03, 1, 0.95])

    if save:
        plt.savefig(f'grafici/meteo_gradovi_{allergen}.png', dpi=150)


In [None]:
def plot_seasonal_concentration_transform(data, save=False):
    """
    Prikaz sezonske koncentracije polena i njihove transformacije (original, Box-Cox, log1p(x/30))
    """
    data = data.copy()

    # Dodavanje kolona za godinu i mesec-dan
    data['year'] = data['date'].dt.year
    data['month_day'] = data['date'].dt.strftime('%m-%d')

    # Dohvatanje alergena i lokacije
    allergen = data['allergen'].iloc[0]
    location = data['location'].iloc[0]

    # Dohvatanje datuma početka i kraja sezone
    start_md = df_allergens.loc[df_allergens['allergen'] == allergen, "start_date"].iat[0]
    end_md = df_allergens.loc[df_allergens['allergen'] == allergen, "end_date"].iat[0]

    # Priprema transformacija: Box-Cox i log1p(x/30)
    data['boxcox'], _ = boxcox(data['value'] + 1e-1)
    data['log1p_div30'] = np.log1p(data['value'] / 30)

    transform_cols = ['value', 'boxcox', 'log1p_div30']
    titles = ['Original', 'Box-Cox', 'log1p(x/30)']

    fig, axes = plt.subplots(1, 3, figsize=(18, 5))

    for ax, col, subtitle in zip(axes, transform_cols, titles):
        # Generisanje liste svih godina u podacima
        years = sorted(data['year'].unique())
        full_season = []

        # Kreiranje DataFrame-a sa svim danima u sezoni za svaku godinu
        for year in years:
            start = pd.to_datetime(f"{year}-{start_md}")
            end = pd.to_datetime(f"{year}-{end_md}")
            season_days = pd.date_range(start, end)
            season_df = pd.DataFrame({'date': season_days})
            season_df['year'] = year
            full_season.append(season_df)

        full_df = pd.concat(full_season, ignore_index=True)

        # Spajanje sa originalnim podacima
        df_plot = full_df.merge(data[['date', col]], on='date', how='left')
        df_plot = df_plot.sort_values('date')
        df_plot['plot_index'] = range(len(df_plot))

        # Pronalaženje blokova gde nedostaju podaci
        na_mask = df_plot[col].isna()
        na_blocks = []
        start_na = None
        for i, is_na in enumerate(na_mask):
            if is_na and start_na is None:
                start_na = df_plot['plot_index'].iloc[i]
            elif not is_na and start_na is not None:
                end_na = df_plot['plot_index'].iloc[i]
                na_blocks.append((start_na, end_na))
                start_na = None
        if start_na is not None:
            na_blocks.append((start_na, df_plot['plot_index'].iloc[-1] + 1))

        # Granice između godina (za isprekidane linije)
        year_changes = df_plot.groupby('year')['plot_index'].min().values[1:]

        # Obeležavanje perioda bez podataka sivom bojom
        for start_na, end_na in na_blocks:
            ax.axvspan(start_na, end_na, color='lightgray')

        # Crtanje linije koncentracije polena
        ax.plot(df_plot['plot_index'], df_plot[col], linewidth=0.6, color='black')

        # Dodavanje isprekidanih linija između godina
        for x in year_changes:
            ax.axvline(x=x, color='black', linestyle='--', linewidth=0.6)

        # Postavljanje naslova subplot-a
        ax.set_title(subtitle, fontsize=14, fontweight='bold', pad=10)

        # Postavljanje oznake y ose
        ax.set_ylabel('Koncentracija polena')

        # Kreiranje tick pozicija i labela (svaki drugi mesec)
        tick_positions = []
        tick_labels = []
        seen_labels = set()
        all_tick_positions = []

        for year in years:
            df_year = df_plot[df_plot['year'] == year]
            df_year['month'] = df_year['date'].dt.month

            grouped = df_year.groupby('month')
            check = -1
            for month, group in grouped:
                check += 1
                if check == 0:
                    continue
                if check > 6:
                    break
                pos = group['plot_index'].iloc[0]
                all_tick_positions.append(pos)

                if month % 2 != 0:
                    label = f"{calendar.month_abbr[month]} {year}"
                    if label not in seen_labels:
                        tick_positions.append(pos)
                        tick_labels.append(label)
                        seen_labels.add(label)

        ax.set_xticks(all_tick_positions, minor=True)
        ax.tick_params(axis='x', which='minor', length=3, color='gray')

        ax.set_xticks(tick_positions)
        ax.set_xticklabels(tick_labels, rotation=45, fontsize=8)

    # Glavni naslov figure
    main_title = f'Sezonska koncentracija polena - {allergen}'
    if location is not None:
        main_title += f' ({location})'
    fig.suptitle(main_title, fontsize=18, fontweight='bold')

    plt.tight_layout(rect=[0, 0, 1, 0.95])

    # Snimanje figure ako je save=True
    if save:
        filename = f"grafici\\{allergen}_{location}_sezonske_transformacije.png" if location else f"{allergen}_sezonske_transformacije.png"
        plt.savefig(filename, dpi=300)

    plt.show()


### kriging_model

In [None]:
def plot_seasonal_decomposition(season_df, save=False):
    """
    Vizualizacija sezonske dekompozicije (transformacija + trend + sezonalnost + rezidual)
    za dati DataFrame.

    """
    df = season_df.copy()

    # Dodavanje kolona za godinu i mesec-dan
    df['year'] = df['date'].dt.year
    df['month_day'] = df['date'].dt.strftime('%m-%d')

    # Dohvatanje alergena i lokacije
    allergen = df['allergen'].iloc[0]
    location = df['location'].iloc[0]

    # Dohvatanje datuma početka i kraja sezone iz allergens DataFrame
    start_md = df_allergens.loc[df_allergens['allergen'] == allergen, "start_date"].iat[0]
    end_md = df_allergens.loc[df_allergens['allergen'] == allergen, "end_date"].iat[0]

    years = sorted(df['year'].unique())

    # Kreiranje uniformnog sezonskog indeksa za sve godine
    full_season = []
    for year in years:
        start = pd.to_datetime(f"{year}-{start_md}")
        end = pd.to_datetime(f"{year}-{end_md}")
        season_days = pd.date_range(start, end)
        season_df_temp = pd.DataFrame({'date': season_days})
        season_df_temp['year'] = year
        full_season.append(season_df_temp)

    full_df = pd.concat(full_season, ignore_index=True)

    # Spajanje sa originalnim podacima
    df_plot = full_df.merge(df, on='date', how='left').sort_values('date')
    df_plot['plot_index'] = range(len(df_plot))
    df_plot['year'] = df_plot['date'].dt.year

    # Pronalaženje blokova gde nedostaju podaci
    na_mask = df_plot['boxcox_value'].isna()
    na_blocks = []
    start = None
    for i, is_na in enumerate(na_mask):
        if is_na and start is None:
            start = df_plot['plot_index'].iloc[i]
        elif not is_na and start is not None:
            end = df_plot['plot_index'].iloc[i]
            na_blocks.append((start, end))
            start = None
    if start is not None:
        na_blocks.append((start, df_plot['plot_index'].iloc[-1] + 1))

    # Granice između godina
    year_changes = df_plot.groupby('year')['plot_index'].min().values[1:]

    # Kreiranje figure sa dva subplot-a (levi: trend + sezona, desni: reziduali)
    fig, axs = plt.subplots(1, 2, figsize=(16, 6), sharex=True)

    # Leva strana: transformisana vrednost i fitted vrednost (trend + sezona)
    axs[0].plot(df_plot['plot_index'], df_plot['boxcox_value'], label='Original', alpha=0.5, color='black', linewidth=0.6)
    axs[0].plot(df_plot['plot_index'], df_plot['fitted'], label='Trend + Sezona', linestyle='--', color='black')
    axs[0].set_title('Transformisana koncentracija i trend sa sezonskom komponentom')
    axs[0].set_ylabel('Transformisana koncentracija')
    axs[0].legend()

    # Desna strana: reziduali
    axs[1].plot(df_plot['plot_index'], df_plot['residual'], label='Rezidual', alpha=1, color='black', linestyle='-')
    axs[1].set_title('Reziduali')
    axs[1].set_ylabel('Rezidual')
    axs[1].legend()

    # Dodavanje sivih polja za periode bez podataka i isprekidanih linija između godina
    for ax in axs:
        for start, end in na_blocks:
            ax.axvspan(start, end, color='grey', alpha=0.2)

        for x in year_changes:
            ax.axvline(x=x, color='black', linestyle='--', linewidth=0.6)

    # Kreiranje tick pozicija i labela (svaki drugi mesec)
    tick_positions = []
    tick_labels = []
    seen_labels = set()
    all_tick_positions = []

    for year in years:
        df_year = df_plot[df_plot['year'] == year]
        df_year['month'] = df_year['date'].dt.month

        grouped = df_year.groupby('month')
        check = -1
        for month, group in grouped:
            check += 1
            if check == 0:
                continue
            if check > 6:
                break
            pos = group['plot_index'].iloc[0]
            all_tick_positions.append(pos)

            if month % 2 == 0:
                label = f"{calendar.month_abbr[month]} {year}"
                if label not in seen_labels:
                    tick_positions.append(pos)
                    tick_labels.append(label)
                    seen_labels.add(label)

    # Dodavanje tick labela na obe ose
    for ax in axs:
        ax.set_xticks(all_tick_positions, minor=True)
        ax.tick_params(axis='x', which='minor', length=4, color='gray')

        ax.set_xticks(tick_positions)
        ax.set_xticklabels(tick_labels, rotation=45, fontsize=8)

    # Naslov figure
    title = f"Sezonska dekompozicija – {allergen}, lokacija: {location}" if location else f"Sezonska dekompozicija – {allergen}"
    fig.suptitle(title, fontsize=14)

    plt.tight_layout(rect=[0, 0, 1, 0.95])

    # Snimanje figure
    if save:
        filename = f"grafici\\{allergen}_{location}_sezonska_dekompozicija.png" if location else f"{allergen}_sezonska_dekompozicija_bw.png"
        plt.savefig(filename, dpi=75)

    plt.show()


## sarimax/prophet_model

In [None]:
def plot_periodogram(data, value_col='transform', save=False):
    """
    Detektovanje dominantnog perioda serije pomoću periodograma.
    """
    allergen = data['allergen'].iloc[0]
    location = data['location'].iloc[0]

    frequencies, power = periodogram(data[value_col])

    dominant_frequency_bc = frequencies[1:][np.argmax(power[1:])]
    period = int(np.round(1 / dominant_frequency_bc))

    plt.figure(figsize=(10, 6))
    plt.plot(frequencies[1:], power[1:], color='black', linewidth=1.2, label='Periodogram')
    plt.axvline(dominant_frequency_bc, color='gray', linestyle='--', linewidth=1,
                label=f'Dominantna freq: {dominant_frequency_bc:.4f} Hz\nPeriod: {period} dana')

    plt.xlim(0, 0.1)
    plt.xlabel('Frekvencija [Hz]', fontsize=12)
    plt.ylabel('Snaga', fontsize=12)
    plt.title(f'Periodogram – {allergen} ({location})', fontsize=18, fontweight='bold', pad=15)

    plt.legend(loc='upper right', fontsize=10, frameon=False)
    plt.grid(True, linestyle='--', linewidth=0.5, color='gray', alpha=0.7)
    plt.tight_layout()

    if save:
        save_path = f'grafici\\periodogram_{allergen}_{location}.png'
        plt.savefig(save_path, dpi=150, bbox_inches='tight')

    plt.show()

    print(f"Dominantni period za {allergen} ({location}) je: {period} dana")
    return period


In [None]:
def seasonal_segment_decomposition(df, allergen, location, column='boxcox', model='additive', lags=130, save=False):
    """
    Prikaz sezonske dekompozicije + ACF i PACF.
    """
    # Filtriranje
    df['month_day'] = df.date.dt.strftime('%m-%d')
    mask = (df['start_date'] <= df['month_day']) & (df['month_day'] <= df['end_date'])
    df_season = df.loc[mask].copy()
    df_season['year'] = df_season.date.dt.year

    # Spojena sezonska serija
    segments = []
    breakpoints = []
    cursor = 0

    for year, group in df_season.groupby('year'):
        segment = group[column].reset_index(drop=True)
        segments.append(segment)
        cursor += len(segment)
        breakpoints.append(cursor)

    joined_series = pd.concat(segments, ignore_index=True)

    # Dekompozicija
    period = int(np.max([len(seg) for seg in segments]))
    decomposition = seasonal_decompose(joined_series, model=model, period=period)

    # Figure layout
    fig = plt.figure(figsize=(16, 10))
    gs = fig.add_gridspec(4, 3, wspace=0.4, hspace=0.6)

    plt.style.use('grayscale')

    def plot_with_breaks(ax, series, title, ylabel, color='black'):
        ax.plot(series, color=color, linewidth=1)
        for bp in breakpoints[:-1]:
            ax.axvline(bp, color='gray', linestyle='--', alpha=0.5)
        ax.set_title(title, fontsize=14, fontweight='bold', pad=8)
        ax.set_ylabel(ylabel, fontsize=12)
        ax.set_xlabel('Vreme (dani)', fontsize=11)
        ax.grid(True, linestyle='--', linewidth=0.4, alpha=0.7)

    # Original
    ax1 = fig.add_subplot(gs[0, 0])
    plot_with_breaks(ax1, joined_series, "Spojeni sezonski segmenti", "Vrednost")

    # Trend
    ax2 = fig.add_subplot(gs[1, 0])
    plot_with_breaks(ax2, decomposition.trend, "Trend", "Trend")

    # Seasonality
    ax3 = fig.add_subplot(gs[2, 0])
    plot_with_breaks(ax3, decomposition.seasonal, "Sezonalna komponenta", "Sezonalnost")

    # Residual
    ax4 = fig.add_subplot(gs[3, 0])
    plot_with_breaks(ax4, decomposition.resid, "Rezidualne komponente", "Rezidual")

    # ACF
    ax5 = fig.add_subplot(gs[:2, 1:])
    plot_acf(joined_series.dropna(), lags=lags, ax=ax5, color='black')
    ax5.set_title("Autokorelaciona funkcija (ACF)", fontsize=14, fontweight='bold', pad=8)
    ax5.set_xlabel("Lag", fontsize=11)
    ax5.set_ylabel("ACF", fontsize=11)

    # PACF
    ax6 = fig.add_subplot(gs[2:, 1:])
    plot_pacf(joined_series.dropna(), lags=lags, ax=ax6, color='black')
    ax6.set_title("Parcijalna autokorelacija (PACF)", fontsize=14, fontweight='bold', pad=8)
    ax6.set_xlabel("Lag", fontsize=11)
    ax6.set_ylabel("PACF", fontsize=11)

    # Glavni naslov
    plt.suptitle(f"Spojeni sezonski signali – {allergen} ({location})",
                 fontsize=18, fontweight='bold', y=0.98)

    plt.tight_layout(rect=[0, 0.03, 1, 0.95])

    if save:
        save_path = f'grafici\\seasonal_segment_{allergen}_{location}.png'
        plt.savefig(save_path, dpi=150, bbox_inches='tight')

    plt.show()
    plt.style.use('default')


In [None]:
def plot_seasonal_predictions(data, forecast_df, allergen, location, n_seasons=None, col='value', n_days=1, meteo = False, save=False):
    """
    Prikaz sezonskih koncentracija polena sa predikcijama i intervalima poverenja.
    """
    data = data.copy()
    forecast_df = forecast_df.copy()

    data['year'] = data['date'].dt.year
    data['month_day'] = data['date'].dt.strftime('%m-%d')
    forecast_df['year'] = forecast_df['date'].dt.year

    start_md = df_allergens.loc[df_allergens['allergen'] == allergen, "start_date"].iat[0]
    end_md = df_allergens.loc[df_allergens['allergen'] == allergen, "end_date"].iat[0]

    years = sorted(data['year'].unique())
    if n_seasons is not None:
        years = years[-n_seasons:]

    full_season = []
    for year in years:
        start = pd.to_datetime(f"{year}-{start_md}")
        end = pd.to_datetime(f"{year}-{end_md}")
        season_days = pd.date_range(start, end)
        season_df = pd.DataFrame({'date': season_days})
        season_df['year'] = year
        full_season.append(season_df)

    full_df = pd.concat(full_season, ignore_index=True)

    pred_col = f"pred{n_days-1}d"
    pred_lower_col = f"pred{n_days-1}d_lower"
    pred_upper_col = f"pred{n_days-1}d_upper"

    forecast_shifted = forecast_df.copy()
    forecast_shifted['date'] = forecast_shifted['date'] + pd.to_timedelta(n_days-1, unit="D")

    forecast_cols = {pred_col: 'prediction'}
    if pred_lower_col in forecast_shifted.columns and pred_upper_col in forecast_shifted.columns:
        forecast_cols[pred_lower_col] = 'prediction_lower'
        forecast_cols[pred_upper_col] = 'prediction_upper'

    df_plot = full_df.merge(data[['date', col, 'imputed']], on='date', how='left')
    df_plot = df_plot.merge(
        forecast_shifted[['date'] + list(forecast_cols.keys())].rename(columns=forecast_cols),
        on='date', how='left'
    )

    df_plot = df_plot.sort_values('date')
    df_plot['plot_index'] = range(len(df_plot))
    year_changes = df_plot.groupby('year')['plot_index'].min().values[1:]

    na_blocks = []
    mask = df_plot[col].isna()
    start = None
    for i, is_na in enumerate(mask):
        if is_na and start is None:
            start = df_plot['plot_index'].iloc[i]
        elif not is_na and start is not None:
            end = df_plot['plot_index'].iloc[i]
            na_blocks.append((start, end))
            start = None
    if start is not None:
        na_blocks.append((start, df_plot['plot_index'].iloc[-1] + 1))

    fig, ax = plt.subplots(figsize=(14, 5))

    for start, end in na_blocks:
        ax.axvspan(start, end, color='lightgray', alpha=0.3)

    ax.plot(df_plot['plot_index'], df_plot[col], label='Stvarna vrednost', color='black', alpha=0.7, linewidth=1)

    if 'prediction_lower' in df_plot.columns and 'prediction_upper' in df_plot.columns:
        ax.fill_between(
            df_plot['plot_index'],
            df_plot['prediction_lower'],
            df_plot['prediction_upper'],
            color='orange',
            alpha=0.25,
            label='Interval poverenja'
        )

    if 'prediction' in df_plot.columns:
        ax.plot(df_plot['plot_index'], df_plot['prediction'],
                label=f'Predikcija ({n_days}d unapred)', color='orange', linestyle='--', linewidth=1.2)

    imputirani = df_plot[df_plot['imputed'] == 1]
    ax.scatter(imputirani['plot_index'], imputirani[col], color='red', s=15, label='Imputovano')

    for x in year_changes:
        ax.axvline(x=x, color='black', linestyle='--', linewidth=0.8)

    # Naslov
    if meteo:
        title_line1 = "Model sa uključenim meteorološkim parametrima"
    else:
        title_line1 = "Model bez meteoroloških parametara"

    title_line2 = (
        f"Sezonska koncentracija polena – {allergen} ({location})\n"
        f"Predikcija {n_days} dana unapred"
    )

    ax.set_title(f"{title_line1}\n{title_line2}", fontsize=14, fontweight='bold')

    ax.set_ylabel('Koncentracija polena [grains/$m^3$]', fontsize=12)
    ax.set_xlabel('Sezonski dani', fontsize=12)

    tick_positions = []
    tick_labels = []
    seen_labels = set()
    all_tick_positions = []

    for year in years:
        df_year = df_plot[df_plot['year'] == year]
        df_year['month'] = df_year['date'].dt.month

        grouped = df_year.groupby('month')
        check = -1
        for month, group in grouped:
            check += 1
            if check == 0:
                continue
            if check > 6:
                break
            pos = group['plot_index'].iloc[0]
            all_tick_positions.append(pos)

            if month % 2 == 0:
                label = f"{calendar.month_abbr[month]} {year}"
                if label not in seen_labels:
                    tick_positions.append(pos)
                    tick_labels.append(label)
                    seen_labels.add(label)

    ax.set_xticks(all_tick_positions, minor=True)
    ax.tick_params(axis='x', which='minor', length=3, color='gray')
    ax.set_xticks(tick_positions)
    ax.set_xticklabels(tick_labels, rotation=45, fontsize=10)

    handles, labels = ax.get_legend_handles_labels()
    unique = dict(zip(labels, handles))
    ax.legend(unique.values(), unique.keys(), fontsize=10, loc='upper right')

    plt.tight_layout()

    if save:
        filename = f"grafici\\{allergen}_{location}_seasonal_prediction_{n_days}d.png" if location else f"{allergen}_seasonal_prediction_{n_days}d_meteo.png"
        plt.savefig(filename, dpi=150)

    plt.show()



In [None]:
def plot_seasonal_predictions_limited(
    data, forecast_df, cutoff_date, n_seasons=None, col='value',
    days_to_predict=7, meteo=False, save=False
):
    """
    Prikaz sezonske koncentracije polena i predikcije od određenog datuma (ograničeno na n dana unapred).
    """
    data = data.copy()
    forecast_df = forecast_df.copy()
    cutoff_date = pd.to_datetime(cutoff_date)
    display_end = cutoff_date + pd.Timedelta(days=days_to_predict - 1)

    # Dohvatanje allergen i location iz data
    allergen = data['allergen'].iloc[0]
    location = data['location'].iloc[0]

    data['year'] = data['date'].dt.year
    forecast_df['year'] = forecast_df['date'].dt.year

    # Dohvatanje start i end datuma sezone iz df_allergens
    start_md = df_allergens.loc[df_allergens['allergen'] == allergen, "start_date"].iat[0]
    end_md = df_allergens.loc[df_allergens['allergen'] == allergen, "end_date"].iat[0]

    years = sorted(data['year'].unique())
    if n_seasons is not None:
        years = years[-n_seasons:]

    # Generisanje svih dana u sezoni
    full_season = []
    for year in years:
        start = pd.to_datetime(f"{year}-{start_md}")
        end = pd.to_datetime(f"{year}-{end_md}")
        season_days = pd.date_range(start, min(end, display_end))
        season_df = pd.DataFrame({'date': season_days})
        season_df['year'] = year
        full_season.append(season_df)

    full_df = pd.concat(full_season, ignore_index=True)
    full_df = full_df[full_df['date'] <= display_end]

    # Spajanje sa originalnim podacima
    df_plot = full_df.merge(data[['date', col, 'imputed']], on='date', how='left')

    # Priprema forecast predikcija i intervala poverenja
    pred_cols = [f'pred{i}d' for i in range(days_to_predict)]
    lower_cols = [f'pred{i}d_lower' for i in range(days_to_predict)]
    upper_cols = [f'pred{i}d_upper' for i in range(days_to_predict)]

    row = forecast_df[forecast_df['date'] == cutoff_date]

    if not row.empty:
        preds = row.loc[:, pred_cols].iloc[0].values
        lowers = row.loc[:, lower_cols].iloc[0].values if set(lower_cols).issubset(row.columns) else [None] * days_to_predict
        uppers = row.loc[:, upper_cols].iloc[0].values if set(upper_cols).issubset(row.columns) else [None] * days_to_predict
    else:
        preds = [None] * days_to_predict
        lowers = [None] * days_to_predict
        uppers = [None] * days_to_predict

    preds_df = pd.DataFrame({
        'date': pd.date_range(start=cutoff_date, periods=days_to_predict),
        'prediction': preds,
        'prediction_lower': lowers,
        'prediction_upper': uppers
    })

    # Merge sa preds_df
    df_plot = df_plot.merge(preds_df, on='date', how='left')
    df_plot = df_plot.sort_values('date')
    df_plot['plot_index'] = range(len(df_plot))

    year_changes = df_plot.groupby('year')['plot_index'].min().values[1:]

    # Pronalaženje blokova bez podataka
    na_blocks = []
    mask = df_plot[col].isna()
    start_na = None
    for i, is_na in enumerate(mask):
        if is_na and start_na is None:
            start_na = df_plot['plot_index'].iloc[i]
        elif not is_na and start_na is not None:
            end_na = df_plot['plot_index'].iloc[i]
            na_blocks.append((start_na, end_na))
            start_na = None
    if start_na is not None:
        na_blocks.append((start_na, df_plot['plot_index'].iloc[-1] + 1))

    fig, ax = plt.subplots(figsize=(14, 5))

    # Prikaz nedostajućih podataka sivom bojom
    for start_na, end_na in na_blocks:
        ax.axvspan(start_na, end_na, color='lightgray', label='Nedostaje podatak')

    # Crtanje stvarnih vrednosti
    ax.plot(df_plot['plot_index'], df_plot[col], label='Stvarna vrednost', color='black', alpha=0.6)

    # Crtanje predikcija i intervala poverenja
    if 'prediction' in df_plot.columns:
        ax.plot(df_plot['plot_index'], df_plot['prediction'], label='Predikcija', color='orange', linestyle='--')

    if 'prediction_lower' in df_plot.columns and 'prediction_upper' in df_plot.columns:
        if df_plot[['prediction_lower', 'prediction_upper']].notna().any().any():
            ax.fill_between(
                df_plot['plot_index'],
                df_plot['prediction_lower'],
                df_plot['prediction_upper'],
                color='orange',
                alpha=0.2,
                label='Interval poverenja'
            )

    # Obeležavanje imputiranih vrednosti crvenim tačkama
    imputirani = df_plot[df_plot['imputed'] == 1]
    ax.scatter(imputirani['plot_index'], imputirani[col], color='red', s=10, label='Imputovano')

    # Dodavanje isprekidanih linija između godina
    for x in year_changes:
        ax.axvline(x=x, color='black', linestyle='--', linewidth=0.6)

    # Naslov
    if meteo:
        title_line1 = "Model sa uključenim meteorološkim parametrima"
    else:
        title_line1 = "Model bez meteoroloških parametara"

    title_line2 = (
        f"Sezonska koncentracija polena – {allergen} ({location})\n"
        f"Predikcija od {cutoff_date.date()} do {display_end.date()}"
    )

    ax.set_title(f"{title_line1}\n{title_line2}", fontsize=14, fontweight='bold', loc='center')

    # Oznake osa
    ax.set_ylabel('Koncentracija polena [grains/$m^3$]')
    ax.set_xlabel('Sezonski dani')

    # Tick pozicije i labele (svaki drugi mesec)
    tick_positions = []
    tick_labels = []
    seen_labels = set()
    all_tick_positions = []

    for year in years:
        df_year = df_plot[df_plot['year'] == year].copy()
        df_year['month'] = df_year['date'].dt.month
        grouped = df_year.groupby('month')

        for month, group in grouped:
            pos = group['plot_index'].iloc[0]
            all_tick_positions.append(pos)
            if month % 2 != 0:
                label = f"{calendar.month_abbr[month]} {year}"
                if label not in seen_labels:
                    tick_positions.append(pos)
                    tick_labels.append(label)
                    seen_labels.add(label)

    ax.set_xticks(all_tick_positions, minor=True)
    ax.tick_params(axis='x', which='minor', length=3, color='gray')
    ax.set_xticks(tick_positions)
    ax.set_xticklabels(tick_labels, rotation=60, fontsize=8)

    # Uklanjanje duplikata iz legende
    handles, labels = ax.get_legend_handles_labels()
    unique = dict(zip(labels, handles))
    ax.legend(unique.values(), unique.keys(), loc='upper right')

    plt.tight_layout()

    # Snimanje figure
    if save:
        filename = f"grafici\\{allergen}_{location}_seasonal_prediction_from_{cutoff_date.date()}_to_{display_end.date()}.png"
        plt.savefig(filename, dpi=150)

    plt.show()


In [None]:
def evaluate_forecast_metrics(pred_df, allergen=None, location=None, lags=50, save=False):
    """
    Računanje metrika za predikciju, prikaz QQ plot i histograma reziduala,
    Ljung-Box test za proveru autokorelacije reziduala.
    """
    # Računanje reziduala
    actual = pred_df['actual']
    pred = pred_df['prediction']
    residuals = pred - actual

    # Računanje metrika za performanse modela
    metrics = {
        'RMSE': np.sqrt(mean_squared_error(actual, pred)),
        'MAE': mean_absolute_error(actual, pred),
        'RMSLE': np.sqrt(np.mean((np.log1p(pred / 30) - np.log1p(actual / 30)) ** 2)),
        'Bias (mean error)': np.mean(residuals),
        'Correlation': np.corrcoef(actual, pred)[0, 1] if len(actual) > 1 else np.nan
    }

    results_df = pd.DataFrame.from_dict(metrics, orient='index', columns=['Vrednost'])

    fig, axes = plt.subplots(1, 2, figsize=(12, 5))

    # QQ plot za reziduale radi provere normalnosti
    stats.probplot(residuals, dist="norm", plot=axes[0])
    axes[0].set_title("QQ plot reziduala")
    axes[0].grid(True)

    # Histogram distribucije reziduala
    axes[1].hist(residuals, bins=20, edgecolor='black', alpha=0.7)
    axes[1].set_title("Histogram reziduala")
    axes[1].set_xlabel("Reziduali")
    axes[1].set_ylabel("Frekvencija")
    axes[1].grid(True)

    plt.tight_layout()

    # Snimanje grafika
    if save:
        filename = f"grafici\\reziduali_{allergen}_{location}.png" if allergen and location else "reziduali_forecast.png"
        plt.savefig(filename, dpi=150)

    plt.show()

    # Ljung-Box test za autokorelaciju reziduala
    lb_test = acorr_ljungbox(residuals, lags=[lags], return_df=True)
    lb_stat = lb_test["lb_stat"].values[0]
    lb_pval = lb_test["lb_pvalue"].values[0]

    # Prikaz rezultata Ljung-Box testa u konzoli
    print("\n=== Ljung-Box test za autokorelaciju reziduala ===")
    if allergen and location:
        print(f"Alergen: {allergen}, Lokacija: {location}")

    print(f"Statistika: {lb_stat:.4f}")
    print(f"p-vrednost: {lb_pval:.4f}")
    print(f"Broj lagova: {lags}")
    print("=> H0: Nema autokorelacije u rezidualima")

    if lb_pval < 0.05:
        print("Odbacuje se H0: postoji značajna autokorelacija u rezidualima.")
    else:
        print("Ne odbacuje se H0: nema dokaza o autokorelaciji - reziduali su u redu.")

    return results_df


In [None]:
def plot_forecast_error_metrics_from_rolling(rolling_df, allergen=None, location=None, horizon=10, save=False):
    """
    Računanje MAE, RMSE, RMSLE po danima iz rolling_df.
    """
    # Lista za skladištenje metrika po danima
    rows = []

    # Računanje metrika za svaki dan u horizontu predikcije
    for i in range(0, horizon):
        col_pred = f"pred{i}d"
        # Predikcija za dan t+i je vrednost iz dana t
        rolling_df[f'actual{i}d'] = rolling_df['actual'].shift(-i)
        actual = rolling_df[f'actual{i}d']
        pred = rolling_df[col_pred]

        # Maskiranje validnih podataka bez NaN vrednosti
        mask = actual.notna() & pred.notna()
        if mask.sum() == 0:
            continue

        rows.append({
            "forecast_day": i,
            "MAE": np.mean(np.abs(pred[mask] - actual[mask])),
            "RMSE": np.sqrt(np.mean((pred[mask] - actual[mask]) ** 2)),
            "RMSLE": np.sqrt(np.mean((np.log1p(pred[mask]/30) - np.log1p(actual[mask]/30)) ** 2))
        })

    # Pretvaranje rezultata u DataFrame za plotovanje
    metrics_df = pd.DataFrame(rows)
    metrics_df = pd.melt(metrics_df, id_vars='forecast_day', var_name='Metric', value_name='Value')

    plt.figure(figsize=(12, 6))
    ax = sns.barplot(data=metrics_df, x='forecast_day', y='Value', hue='Metric')
    plt.title("Greške po danima u horizontu (MAE, RMSE, RMSLE)")
    plt.xlabel("Dan u horizontu")
    plt.ylabel("Vrednost greške")
    plt.legend(title='Metod')

    # Dodavanje vrednosti grešaka na vrh stubaca
    for container in ax.containers:
        ax.bar_label(container, fmt="%.2f", label_type="edge", padding=3)

    plt.tight_layout()

    # Snimanje grafika
    if save:
        filename = f"grafici\\prognoza_greske_{allergen}_{location}.png" if allergen and location else "prognoza_greske.png"
        plt.savefig(filename, dpi=75)

    plt.show()

In [None]:
def plot_naive_forecast_metrics_from_rolling(rolling_df, allergen=None, location=None, horizon=10, save=False):
    """
    Računanje MAE, RMSE i RMSLE za naivnu prognozu.
    """
    # Lista za skladištenje metrika po danima
    rows = []

    # Računanje metrika za svaki dan u horizontu predikcije
    for i in range(1, horizon + 1):
        # Naivna prognoza: predikcija za dan t+i je vrednost iz dana t
        pred = rolling_df['actual']
        actual = rolling_df['actual'].shift(-i)

        # Maskiranje validnih podataka bez NaN vrednosti
        mask = pred.notna() & actual.notna()
        if mask.sum() == 0:
            continue

        pred_valid = pred[mask]
        actual_valid = actual[mask]

        # Računanje MAE, RMSE i RMSLE za trenutni dan u horizontu
        rows.append({
            "forecast_day": i -1,
            "MAE": np.mean(np.abs(pred_valid - actual_valid)),
            "RMSE": np.sqrt(np.mean((pred_valid - actual_valid) ** 2)),
            "RMSLE": np.sqrt(np.mean((np.log1p(pred_valid / 30) - np.log1p(actual_valid / 30)) ** 2))
        })

    # Pretvaranje rezultata u DataFrame za plotovanje
    metrics_df = pd.DataFrame(rows)
    metrics_df = pd.melt(metrics_df, id_vars='forecast_day', var_name='Metric', value_name='Value')

    # Kreiranje bar plot-a za prikaz metrika po danima
    plt.figure(figsize=(12, 6))
    ax = sns.barplot(data=metrics_df, x='forecast_day', y='Value', hue='Metric', errorbar=None)

    # Postavljanje naslova sa alergenom i lokacijom
    title = "Naivna prognoza: greške po danima u horizontu"
    if allergen:
        title += f" – {allergen}"
    if location:
        title += f" ({location})"
    plt.title(title)

    # Postavljanje oznaka osa
    plt.xlabel("Dan u horizontu")
    plt.ylabel("Vrednost greške")
    plt.legend(title='Metod')

    # Dodavanje vrednosti grešaka na vrh stubaca
    for container in ax.containers:
        ax.bar_label(container, fmt="%.2f", label_type="edge", padding=3)

    plt.tight_layout()

    # Snimanje grafika
    if save:
        filename = f"grafici\\naivna_prognoza_greske_{allergen}_{location}.png" if allergen and location else "naivna_prognoza_greske.png"
        plt.savefig(filename, dpi=75)

    plt.show()
