In [3]:
import pandas as pd
from scipy.stats import zscore

### IQR

In [4]:
def detect_outliers_in_column(column: pd.Series, threshold: float = 1.5) -> list:
    """
    Detect outliers in a column of a dataframe using IQR (interquartile range) method
    :param column: column of a dataframe (pd.Series)
    :param threshold: threshold for outlier detection
    :return: list of indices of outliers
    """
    q1 = column.quantile(0.25)
    q3 = column.quantile(0.75)
    iqr = q3 - q1
    lower_bound = q1 - threshold * iqr
    upper_bound = q3 + threshold * iqr

    outlier_indices = column[(column < lower_bound) | (column > upper_bound)].index.tolist()

    return outlier_indices

### zscore

In [5]:
def detect_outliers_zscore(column: pd.Series, threshold: float = 3) -> list:
    """
    Detect outliers in a column of a dataframe using Z-score method.
    
    :param column: column of a dataframe (pd.Series)
    :param threshold: threshold for outlier detection (default is 3)
    :return: list of indices of outliers
    """
    z_scores = zscore(column, nan_policy='omit')  
    outlier_indices = column[abs(z_scores) > threshold].index.tolist()
    
    return outlier_indices

In [6]:
data = {
    "A": [10, 12, 11, 10, 250, 12, 13, 11, 9, 10],
    "B": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
}
df = pd.DataFrame(data)

outliers = detect_outliers_in_column(df["A"], threshold=1.5)
print("Індекси аномалій:", outliers)
print("Аномалії:", df.loc[outliers, "A"])

Індекси аномалій: [4]
Аномалії: 4    250
Name: A, dtype: int64


In [8]:
data = {
    "A": [10, 12, 11, 10, 250, 12, 13, 11, 9, 10],
    "B": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
}
df = pd.DataFrame(data)

outliers = detect_outliers_zscore(df["A"], threshold=2)
print("Індекси аномалій:", outliers)
print("Аномалії:", df.loc[outliers, "A"])

Індекси аномалій: [4]
Аномалії: 4    250
Name: A, dtype: int64


### оцінка зі скользащою

In [5]:
def zscore_adv(x, window):
    r = x.rolling(window=window)
    m = r.mean().shift(1)
    s = r.std(ddof=0).shift(1)
    z = (x-m)/s
    return z

In [8]:
data = {
    "A": [10, 12, 11, 10, 16, 12, 13, 11, 9, 10],
    "B": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
}

df = pd.DataFrame(data)
window = 3
df["A_zscore"] = zscore_adv(df["A"], window)

df

Unnamed: 0,A,B,A_zscore
0,10,1,
1,12,2,
2,11,3,
3,10,4,-1.224745
4,16,5,6.123724
5,12,6,-0.127
6,13,7,0.133631
7,11,8,-1.568929
8,9,9,-3.674235
9,10,10,-0.612372
