<a href="https://colab.research.google.com/github/Starnekan/GPT-Prompty/blob/main/GPT-Prompty/test_prompt/SFRA_autotrafo%20/SFRAAnalyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import sys
import logging
from pathlib import Path
from typing import List, Dict, Tuple

import pandas as pd

# Konfiguracja logowania
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


class SFRAAnalyzer:
    """
    Klasa do wczytywania, walidacji i analizy danych SFRA autotransformatora,
    z automatyczną korekcją progów detekcji anomalii.
    """
    def __init__(
        self,
        file_path: str,
        sep: str = ",",
        encoding: str = "utf-8",
        required_columns: List[str] = None,
        target_anomaly_rate: float = 0.05,
        sigma_step: float = 0.5,
        tol: float = 0.01,
        max_iter: int = 10,
        force_prompt_on_no_anomalies: bool = True
    ) -> None:
        self.file_path = Path(file_path)
        self.sep = sep
        self.encoding = encoding
        self.required_columns = required_columns or [
            "øA RATIO", "øA RATIO ERROR", "øA PHASE DEV", "øA RMS CURRENT",
            "øB RATIO", "øB RATIO ERROR", "øB PHASE DEV", "øB RMS CURRENT",
            "øC RATIO", "øC RATIO ERROR", "øC PHASE DEV", "øC RMS CURRENT",
        ]
        self.target_anomaly_rate = target_anomaly_rate
        self.sigma_step = sigma_step
        self.tol = tol
        self.max_iter = max_iter
        self.sigma_multiplier = 2.0
        self.thresholds: Dict[str, float] = {}
        self.force_prompt_on_no_anomalies = force_prompt_on_no_anomalies

    def load_data(self) -> pd.DataFrame:
        if not self.file_path.exists():
            raise FileNotFoundError(f"Plik nie istnieje: {self.file_path}")
        df = pd.read_csv(self.file_path, sep=self.sep, encoding=self.encoding)
        df.columns = df.columns.str.strip()
        logger.info(f"Wczytano {len(df)} wierszy z pliku {self.file_path.name}")
        return df

    def validate_columns(self, df: pd.DataFrame) -> None:
        missing = [col for col in self.required_columns if col not in df.columns]
        if missing:
            raise ValueError(f"Brakujące kolumny: {missing}. Dostępne: {df.columns.tolist()}")

    def _compute_thresholds(self, df: pd.DataFrame) -> None:
        error_cols = [c for c in self.required_columns if c.endswith("ERROR")]
        stats = df[error_cols].agg(["mean", "std"])
        self.thresholds = {
            col: stats.at["mean", col] + self.sigma_multiplier * stats.at["std", col]
            for col in error_cols
        }

    def detect_anomalies(self, df: pd.DataFrame) -> pd.DataFrame:
        masks = [(df[col] > thr) for col, thr in self.thresholds.items()]
        any_anomaly = pd.concat(masks, axis=1).any(axis=1)
        return df[any_anomaly]

    def calibrate_thresholds(self, df: pd.DataFrame) -> Tuple[Dict[str, float], pd.DataFrame]:
        for i in range(self.max_iter):
            self._compute_thresholds(df)
            anomalies = self.detect_anomalies(df)
            rate = len(anomalies) / len(df)
            logger.info(f"{self.file_path.name} — Iter {i+1}: σ={self.sigma_multiplier:.2f}, rate={rate:.2%}")
            if abs(rate - self.target_anomaly_rate) <= self.tol:
                break
            if rate > self.target_anomaly_rate:
                self.sigma_multiplier += self.sigma_step
            else:
                self.sigma_multiplier = max(self.sigma_step, self.sigma_multiplier - self.sigma_step)
        self._compute_thresholds(df)
        anomalies = self.detect_anomalies(df)
        return self.thresholds, anomalies

    def generate_prompt(self, sample: pd.Series) -> str:
        def val(key: str, suffix: str = "") -> str:
            return f"{sample.get(key, 'N/A')}{suffix}"

        return (
            f"Dane z pomiaru SFRA autotransformatora ({self.file_path.name}):\n"
            f"- øA:\n"
            f"  - RATIO: {val('øA RATIO')}\n"
            f"  - ERROR: {val('øA RATIO ERROR', '%')}\n"
            f"  - PHASE DEV: {val('øA PHASE DEV', '°')}\n"
            f"  - RMS CURRENT: {val('øA RMS CURRENT', ' A')}\n"
            f"- øB:\n"
            f"  - RATIO: {val('øB RATIO')}\n"
            f"  - ERROR: {val('øB RATIO ERROR', '%')}\n"
            f"  - PHASE DEV: {val('øB PHASE DEV', '°')}\n"
            f"  - RMS CURRENT: {val('øB RMS CURRENT', ' A')}\n"
            f"- øC:\n"
            f"  - RATIO: {val('øC RATIO')}\n"
            f"  - ERROR: {val('øC RATIO ERROR', '%')}\n"
            f"  - PHASE DEV: {val('øC PHASE DEV', '°')}\n"
            f"  - RMS CURRENT: {val('øC RMS CURRENT', ' A')}\n"
        )

    def analyze(self) -> None:
        try:
            df = self.load_data()
            self.validate_columns(df)
            thresholds, anomalies = self.calibrate_thresholds(df)
            logger.info(f"{self.file_path.name} — Final thresholds: {self.thresholds}")
            if not anomalies.empty:
                prompt = self.generate_prompt(anomalies.iloc[0])
                print(f"\n=== PROMPT DLA PIERWSZEJ ANOMALII ({self.file_path.name}) ===\n")
                print(prompt)
            elif self.force_prompt_on_no_anomalies:
                prompt = self.generate_prompt(df.iloc[0])
                print(f"\n=== PROMPT (brak anomalii – pierwszy rekord) dla {self.file_path.name} ===\n")
                print(prompt)
            else:
                print(f"🚀 {self.file_path.name}: Nie wykryto anomalii")
        except Exception as e:
            logger.error(f"Błąd w analizie {self.file_path.name}: {e}")


if __name__ == "__main__":
    # katalog, w którym znajdują się wszystkie pliki CSV do analizy
    # W Colab __file__ nie jest zdefiniowane, użyj ścieżki relatywnej lub absolutnej
    data_dir = Path('./')  # Zmień na ścieżkę do Twoich plików CSV
    csv_files = list(data_dir.glob("*.csv"))

    if not csv_files:
        logger.error(f"Nie znaleziono żadnych plików .csv w katalogu {data_dir}")
        sys.exit(1)

    for csv in csv_files:
        analyzer = SFRAAnalyzer(
            file_path=str(csv),
            force_prompt_on_no_anomalies=True
        )
        analyzer.analyze()

ERROR:__main__:Błąd w analizie Moc 21-1 Nysa AT1-0004.csv: Error tokenizing data. C error: Expected 1 fields in line 4, saw 3




=== PROMPT (brak anomalii – pierwszy rekord) dla TTRU3_EXP_Export_2025-02-20T10_43_02.csv ===

Dane z pomiaru SFRA autotransformatora (TTRU3_EXP_Export_2025-02-20T10_43_02.csv):
- øA:
  - RATIO: 7.6251
  - ERROR: 0.08%
  - PHASE DEV: -0.01°
  - RMS CURRENT: 37.4 A
- øB:
  - RATIO: 7.6273
  - ERROR: 0.11%
  - PHASE DEV: -0.03°
  - RMS CURRENT: 40.2 A
- øC:
  - RATIO: 7.6261
  - ERROR: 0.09%
  - PHASE DEV: -0.02°
  - RMS CURRENT: 40.1 A

