# Test out hoger in notebook

In [1]:
from typing import Union

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import ruptures as rpt
from floris.utilities import wrap_180, wrap_360

from flasc import FlascDataFrame
from flasc.utilities.circular_statistics import calc_wd_mean_radial

In [2]:
def discretize(x: np.ndarray, threshold: float = 100) -> np.ndarray:
    """Discretize data points into segments.

    Args:
        x (np.ndarray): Data to discretize.
        threshold (float, optional): Threshold for discretization. Defaults to 100.

    Returns:
        np.ndarray: Discretized data.
    """
    # Handle NA values
    na = pd.isna(x)

    # Sort indices
    o = np.argsort(x)
    x_sorted = x[o]

    # Initialize group labels
    y = np.ones(len(x_sorted))

    # Find significant jumps
    d = np.diff(x_sorted)
    w = np.where(d >= threshold)[0]

    # Assign group labels
    for i in range(len(d)):
        if i in w:
            y[i + 1 :] += 1

    # Reorder and handle NAs
    y = y[np.argsort(o)]
    y[na] = np.nan

    return y

In [3]:
df = pd.read_feather('scada_exemple.ftr')

In [4]:
threshold = 100
reference = 'last'
verbose = True

In [5]:
# Make sure in FlascDataFrame format
df = FlascDataFrame(df)

In [6]:
if reference == "first":
    ref = 0
elif reference == "last":
    ref = len(df) - 1
else:
    try:
        ref = np.argmin(np.abs(df["time"].values - pd.to_datetime(reference)))
    except ValueError:
        raise ValueError(
            "Invalid reference point. Please use 'first', 'last', or a valid time string."
        )

In [8]:
# Initialize results dataframe
df_jump = pd.DataFrame(columns=["Knot", "Jump", "Turbine"])

In [None]:
# Loop over combinations of turbines
for t_i in range(df.n_turbines):
    t_i_col = "wd_%03d" % t_i

    if verbose:
        print(f"Processing turbine {t_i}")

    for t_j in range(df.n_turbines):
        if t_i == t_j:
            continue
        t_j_col = "wd_%03d" % t_j

        if verbose:
            print(f"...with turbine {t_j}")

        # Compute the wrapped error
        wrapped_error = wrap_180(df[t_i_col].values - df[t_j_col].values)

        # R code uses picor: Piecewise-constant regression, using
        # https://github.com/chasmani/piecewise-regression in python
        # as a replacement for picor
        # I can't find a close python equivalent for picor, so starting with ruptures
        # this is convenient as via the dependency on wind-up this is already
        # a defacto requirement for FLASC

        # Note these first lines (minus the threshold)
        # are verbatim from the example here
        # https://github.com/deepcharles/ruptures
        # presumably can improve somewhat
        algo = rpt.Pelt(model="l1", min_size=threshold).fit(wrapped_error)
        result = algo.predict(pen=5000)
        # algo = rpt.Window(width=threshold, model='l1').fit(wrapped_error)
        # pen = 20 # np.log(len(wrapped_error)) * np.nanstd(wrapped_error)**2
        # print(f"Pen: {pen}")
        # result = algo.predict(pen=pen)
        break
    break

        # # If results is length 1 or 0, no significant jumps detected, continue
        # if len(result) <= 1:
        #     if verbose:
        #         print("... No significant jumps detected")
        #     continue

        # if verbose:
        #     # print(f"... Jumps detected at: {result[:-1]}")
        #     print(f" Number of jumps: {len(result)-1}")

        # # Compute the mean values in error in each of the identified segments
        # # so we can compute the jump size at each jump location
        # knots = result[:-1]  # Exclude the end point returned by ruptures
        # values = [
        #     calc_wd_mean_radial(wrapped_error[start:end])
        #     for start, end in zip([0] + knots, knots + [len(wrapped_error)])
        # ]

        # # Paul's note: I added wrap_180 here though I don't think it's in original R code
        # # but it feels correct to me to include it since errors 
        # # should not include values > abs(180)
        # values = [wrap_180(v) for v in values]

        # # if verbose:
        # #     print(f"... Jump values per area: {values}")

        # jumps = np.diff(values)

        # # if verbose:
        # #     print(f"... Jump sizes: {jumps}")

        # # Append result to the result dataframe
        # # TODO: Not a big deal but this is a slow way to do it
        # df_jump = pd.concat(
        #     [df_jump, pd.DataFrame({"Knot": knots, "Jump": jumps, "Turbine": t_i})]
        # )


Processing turbine 0
...with turbine 1
 Number of jumps: 481
...with turbine 2
 Number of jumps: 481
...with turbine 3
 Number of jumps: 481
...with turbine 4
 Number of jumps: 481
...with turbine 5
 Number of jumps: 481
...with turbine 6
 Number of jumps: 481
...with turbine 7
 Number of jumps: 481
...with turbine 8
 Number of jumps: 481
...with turbine 9


KeyboardInterrupt: 

In [17]:
df_jump

Unnamed: 0,Knot,Jump,Turbine
