In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## ITU - 415

In [None]:
import os
print(os.listdir("../data"))

In [None]:
arquivos = [
    "../data/full_history_ITU-415_2025-06-01_a_2025-06-17.csv",
    "../data/full_history_ITU-415_2025-06-17_a_2025-06-29.csv",
    "../data/full_history_ITU-415_2025-06-29_a_2025-06-31.csv",
    "../data/full_history_ITU-415_2025-07-01_a_2025-07-17 (1).csv"
]

total = 0
for f in arquivos:
    nlinhas = pd.read_csv(f).shape[0]
    print(f"{f} → {nlinhas} linhas")
    total += nlinhas

print(f"\nTotal de linhas somadas: {total}")

#### Concatenação

In [None]:
df_415 = pd.concat([pd.read_csv(f) for f in arquivos], ignore_index=True)

print(df_415.shape)
df_415.head()

In [None]:
df_415.info()

#### Mudar de object para datetime

In [None]:
df_415["timestamp"] = pd.to_datetime(df_415["timestamp"], utc=True, errors="coerce")

In [None]:
df_415.info()

In [None]:
print("Linhas x colunas:", df_415.shape)
print(df_415.head(10))
print("\nRecursos (sensores) disponíveis:\n", df_415["resource"].value_counts())

In [None]:
print("Sensores únicos:", df_415['resource'].nunique())
print(df_415['resource'].unique())  # lista todos, na ordem em que aparecem no DataFrame
print("Período:", df_415['timestamp'].min(), "→", df_415['timestamp'].max())


In [None]:
df_stop = df_415[df_415["resource"] == "Stop"].copy()
print("Valores únicos do STOP:", df_stop["value"].unique())

print("\nFrequência de cada valor STOP:")
print(df_stop["value"].value_counts(dropna=False))



In [None]:
df_415["timestamp_diff"] = df_415["timestamp"].diff().dt.total_seconds()

print(df_415["timestamp_diff"].describe())

In [None]:
plt.figure(figsize=(10,5))
plt.hist(df_415["timestamp_diff"].dropna(), bins=200, range=(0,5))
plt.xlabel("Intervalo entre leituras (s)")
plt.ylabel("Frequência")
plt.title("Distribuição dos intervalos (zoom até 5s)")
plt.show()

In [None]:
# função que calcula estatísticas do delta por sensor
def sensor_freq_stats(df):
    out = {}
    for res, group in df.groupby("resource"):
        diffs = group["timestamp"].sort_values().diff().dt.total_seconds().dropna()
        if len(diffs) > 0:
            out[res] = {
                "n_registros": len(group),
                "mediana_s": diffs.median(),
                "media_s": diffs.mean(),
                "p25_s": diffs.quantile(0.25),
                "p75_s": diffs.quantile(0.75),
                "max_s": diffs.max()
            }
    return pd.DataFrame(out).T.sort_values("mediana_s")

freq_table = sensor_freq_stats(df_415)

# visualizar top e bottom
print(freq_table.head(10))   # sensores mais rápidos
print(freq_table.tail(10))   # sensores mais lentos

In [None]:
flex_cols = [str(c) for c in df_415["resource"].unique() if isinstance(c, str) and "FlexAnalogue" in str(c)]
print("FlexAnalogues encontrados:", flex_cols)


In [None]:
stop = df_415.loc[df_415["resource"] == "STOP", ["timestamp", "value"]].copy()
stop = stop.sort_values("timestamp").drop_duplicates(subset="timestamp", keep="last")
stop["STOP"] = stop["value"].round().astype(int)

status_15s = (
    stop.set_index("timestamp")[["STOP"]]
        .resample("15S").last()
        .ffill()
)
status_15s["status"] = status_15s["STOP"].map({0: "ON", 1: "OFF"}).astype("category")

print(status_15s["status"].value_counts(dropna=False))

In [None]:
SENT = 2.1e9
rec = df_415.loc[df_415["resource"] == "Recalque", ["timestamp", "value"]].copy()
rec = rec.sort_values("timestamp").drop_duplicates(subset="timestamp", keep="last")
rec.loc[rec["value"] >= SENT, "value"] = np.nan

rec_15s = (
    rec.set_index("timestamp")["value"]
       .resample("15S").mean()
)

print("Recalque 15s - % nulos:", rec_15s.isna().mean())

In [None]:
from scipy.stats import pearsonr

def series_15s(df_sensor):
    """Limpa sentinela e reamostra em 15s (média)."""
    s = df_sensor.sort_values("timestamp").drop_duplicates(subset="timestamp", keep="last").copy()
    s.loc[s["value"] >= SENT, "value"] = np.nan
    return s.set_index("timestamp")["value"].resample("15S").mean()

def diffs_stats(df_sensor):
    """Estatísticas de intervalo entre amostras (em segundos)."""
    g = df_sensor.sort_values("timestamp")["timestamp"].diff().dt.total_seconds().dropna()
    if len(g) == 0:
        return np.nan, np.nan, np.nan
    return np.median(g), np.mean(g), np.max(g)

def safe_corr(a, b):
    """Correlação de Pearson entre duas séries alinhadas (dropna)."""
    tmp = pd.concat([a, b], axis=1, keys=["x","y"]).dropna()
    if len(tmp) < 10:
        return np.nan
    try:
        r, _ = pearsonr(tmp["x"], tmp["y"])
        return r
    except Exception:
        return np.nan


In [None]:
flex_cols = ['FlexAnalogue1_1', 'FlexAnalogue1_2', 'FlexAnalogue2_1', 'FlexAnalogue2_2',
             'FlexAnalogue3_1', 'FlexAnalogue3_2', 'FlexAnalogue4_1', 'FlexAnalogue4_2',
             'FlexAnalogue6_1', 'FlexAnalogue6_2', 'FlexAnalogue7_1', 'FlexAnalogue7_2',
             'FlexAnalogue8_1', 'FlexAnalogue8_2', 'FlexAnalogue9_1', 'FlexAnalogue9_2',
             'FlexAnalogue10_1', 'FlexAnalogue10_2', 'FlexAnalogue11_1', 'FlexAnalogue11_2',
             'FlexAnalogue12_1', 'FlexAnalogue12_2']

rows = []
for col in flex_cols:
    sub = df_415.loc[df_415["resource"] == col, ["timestamp","value"]].copy()
    if sub.empty:
        continue

    # stats de intervalo (frequência) na série crua
    med_s, mean_s, max_s = diffs_stats(sub)

    # série 15s
    s15 = series_15s(sub)

    # cobertura em ON
    joined = status_15s.join(s15.to_frame(name=col), how="left")
    on = joined[joined["status"]=="ON"][col]
    coverage_on = 1 - on.isna().mean()

    # estatísticas de valor em ON
    vmin = np.nanmin(on.values) if on.notna().any() else np.nan
    vmed = np.nanmedian(on.values) if on.notna().any() else np.nan
    vp95 = np.nanpercentile(on.values, 95) if on.notna().any() else np.nan

    # correlação com Recalque (em ON)
    corr_rec = safe_corr(on, rec_15s.loc[on.index])

    rows.append({
        "sensor": col,
        "freq_mediana_s": med_s,
        "freq_media_s": mean_s,
        "freq_max_s": max_s,
        "coverage_ON": coverage_on,
        "min_ON": vmin,
        "median_ON": vmed,
        "p95_ON": vp95,
        "corr_with_Recalque_ON": corr_rec,
    })

flex_report = pd.DataFrame(rows).sort_values(
    ["coverage_ON", "freq_mediana_s"], ascending=[False, True]
)

pd.set_option("display.max_rows", 200)
flex_report


In [None]:
for col in flex_cols:
    vals = df_415.loc[df_415["resource"]==col, "value"]
    print(col, "unique:", vals.nunique(), 
          "min:", vals.min(), "max:", vals.max())


In [None]:
# séries já tratadas (limpando sentinela)
rec = df_415.loc[df_415["resource"]=="Recalque", ["timestamp","value"]].copy()
rec.loc[rec["value"] >= 2.1e9, "value"] = np.nan
rec = rec.set_index("timestamp").sort_index().resample("1min").mean()

f41 = df_415.loc[df_415["resource"]=="FlexAnalogue4_1", ["timestamp","value"]].copy()
f41.loc[f41["value"] >= 2.1e9, "value"] = np.nan
f41 = f41.set_index("timestamp").sort_index().resample("1min").mean()

f42 = df_415.loc[df_415["resource"]=="FlexAnalogue4_2", ["timestamp","value"]].copy()
f42.loc[f42["value"] >= 2.1e9, "value"] = np.nan
f42 = f42.set_index("timestamp").sort_index().resample("1min").mean()

# correlação
print("Corr Recalque vs Flex4_1:", rec.corr(f41))
print("Corr Recalque vs Flex4_2:", rec.corr(f42))

# gráfico comparativo (1 semana ou 1 dia, pra não ficar pesado)
plt.figure(figsize=(12,5))
plt.plot(rec.index[:500], rec.values[:500], label="Recalque")
plt.plot(f41.index[:500], f41.values[:500], label="Flex4_1")
plt.plot(f42.index[:500], f42.values[:500], label="Flex4_2")
plt.legend()
plt.title("Comparativo Recalque vs FlexAnalogue4_1 e 4_2")
plt.show()

In [None]:
print(type(rec), rec.head())
print(type(f41), f41.head())


In [None]:
import pandas as pd
import numpy as np

SENT = 2.1e9

def one_min_series(df, resource):
    s = df.loc[df["resource"]==resource, ["timestamp","value"]].copy()
    if s.empty:
        return None
    s["timestamp"] = pd.to_datetime(s["timestamp"], utc=True, errors="coerce")
    s = s.dropna(subset=["timestamp"]).sort_values("timestamp").drop_duplicates(subset="timestamp", keep="last")
    s.loc[s["value"] >= SENT, "value"] = np.nan
    s = s.set_index("timestamp")["value"].resample("1min").mean()  # <- Series!
    s.name = resource
    return s


In [None]:
rec = one_min_series(df_415, "Recalque")
f41 = one_min_series(df_415, "FlexAnalogue4_1")
f42 = one_min_series(df_415, "FlexAnalogue4_2")

# Empilha e alinha por índice
pair_41 = pd.concat([rec, f41], axis=1).dropna()
pair_42 = pd.concat([rec, f42], axis=1).dropna()

# Correlações (escalares)
corr_41 = pair_41.corr().loc["Recalque","FlexAnalogue4_1"] if not pair_41.empty else np.nan
corr_42 = pair_42.corr().loc["Recalque","FlexAnalogue4_2"] if not pair_42.empty else np.nan

print("Corr(Recalque, Flex4_1) =", corr_41)
print("Corr(Recalque, Flex4_2) =", corr_42)


In [None]:
import matplotlib.pyplot as plt

# escolha uma janela curta para visualizar (ex.: as primeiras 500 amostras válidas)
w = 500
viz = pair_41.iloc[:w] if len(pair_41) >= w else pair_41

plt.figure(figsize=(12,5))
plt.plot(viz.index, viz["Recalque"], label="Recalque")
plt.plot(viz.index, viz["FlexAnalogue4_1"], label="FlexAnalogue4_1")
plt.title("Recalque vs FlexAnalogue4_1 (1min)")
plt.legend()
plt.show()

viz = pair_42.iloc[:w] if len(pair_42) >= w else pair_42
plt.figure(figsize=(12,5))
plt.plot(viz.index, viz["Recalque"], label="Recalque")
plt.plot(viz.index, viz["FlexAnalogue4_2"], label="FlexAnalogue4_2")
plt.title("Recalque vs FlexAnalogue4_2 (1min)")
plt.legend()
plt.show()


In [None]:
print("Recalque válidos:", rec.notna().sum())
print("Flex4_1 válidos:", f41.notna().sum())
print("Flex4_2 válidos:", f42.notna().sum())


In [None]:
print("join Recalque + Flex4_1:", len(pair_41))
print("join Recalque + Flex4_2:", len(pair_42))


In [None]:
rec_raw = df_415[df_415["resource"]=="Recalque"]["value"]
print("N registros:", len(rec_raw))
print("Valores únicos:", rec_raw.nunique())
print("Min:", rec_raw.min(), "Max:", rec_raw.max())
print(rec_raw.head(20))


In [None]:
import numpy as np
import pandas as pd

SENT_VALUES = [2147483647, 2147483645]  # possíveis sentinelas

def diagnostico_resources(df):
    rows = []
    for res, g in df.groupby("resource"):
        vals = g["value"].values

        n = len(vals)
        unicos = len(np.unique(vals))
        vmin, vmax = np.nanmin(vals), np.nanmax(vals)

        # contagem de sentinelas
        mask_sent = np.isin(vals, SENT_VALUES)
        perc_sent = mask_sent.mean() if n > 0 else np.nan
        perc_valid = 1 - perc_sent if perc_sent is not np.nan else np.nan

        rows.append({
            "resource": res,
            "n_registros": n,
            "n_valores_unicos": unicos,
            "min": vmin,
            "max": vmax,
            "%_sentinela": round(perc_sent*100,2),
            "%_validos": round(perc_valid*100,2)
        })
    return pd.DataFrame(rows).sort_values("%_validos", ascending=False)

diag = diagnostico_resources(df_415)

pd.set_option("display.max_rows", 100)
print(diag)


In [None]:
print(sorted(df_415["resource"].dropna().unique()))


In [None]:
vib = df_415[df_415["resource"]=="Vibracao"]["value"]

print("N registros:", len(vib))
print("Valores únicos:", vib.nunique())
print("Min:", vib.min(), "Max:", vib.max())

print("\nAmostra:")
print(vib.head(20))


In [None]:
f41 = df_415[df_415["resource"]=="FlexAnalogue4_1"]["value"].replace([2147483647,2147483645], np.nan)
f42 = df_415[df_415["resource"]=="FlexAnalogue4_2"]["value"].replace([2147483647,2147483645], np.nan)

print("Flex4_1 min:", f41.min(), "max:", f41.max(), "unique:", f41.nunique())
print("Flex4_2 min:", f42.min(), "max:", f42.max(), "unique:", f42.nunique())
print("Correlação bruta entre Flex4_1 e Flex4_2:", f41.corr(f42))


In [None]:
import pandas as pd
import numpy as np

def to_series(df, resource, freq="15S"):
    s = df[df["resource"]==resource][["timestamp","value"]].copy()
    s["timestamp"] = pd.to_datetime(s["timestamp"], utc=True, errors="coerce")
    s = s.dropna().drop_duplicates("timestamp")
    s.loc[s["value"].isin([2147483647,2147483645]), "value"] = np.nan
    return s.set_index("timestamp")["value"].resample(freq).mean()

f41_15s = to_series(df_415, "FlexAnalogue4_1")
f42_15s = to_series(df_415, "FlexAnalogue4_2")

pair = pd.concat([f41_15s, f42_15s], axis=1)
print(pair.corr())

print("\nResumo estatístico:")
print(pair.describe())


In [None]:
import pandas as pd
import numpy as np

df_415_long = df_415.copy()
df_415_long["timestamp"] = pd.to_datetime(df_415_long["timestamp"], utc=True, errors="coerce")

# renomeia apenas o canal identificado como sucção
df_415_long["resource"] = df_415_long["resource"].replace({"FlexAnalogue4_1": "Succao"})

print("Existe 'Succao' agora?", "Succao" in df_415_long["resource"].unique())


In [None]:

# Trabalhe numa CÓPIA para não mexer no original:
df = df_415_long.copy()

# --- Diagnóstico antes da limpeza ---
SENT = {2147483647.0, 2147483645.0}

total_counts = df.groupby("resource")["value"].size()
sent_counts  = df["value"].isin(SENT).groupby(df["resource"]).sum()
report = pd.DataFrame({
    "n_registros": total_counts,
    "n_sentinela": sent_counts.fillna(0).astype(int)
})
report["pct_sentinela"] = (report["n_sentinela"] / report["n_registros"] * 100).round(2)
print(">>> Sentinelas por resource (ANTES):")
print(report.sort_values("pct_sentinela", ascending=False).head(20))

# --- LIMPEZA: transformar sentinela em NaN para todos, EXCETO STOP (0/1 é dado válido) ---
mask = df["value"].isin(SENT) & (df["resource"].str.lower() != "stop")
df.loc[mask, "value"] = np.nan

# --- Checagens pós-limpeza ---
print("\n>>> STOP (deve continuar 0/1):",
      df.loc[df["resource"].str.lower()=="stop", "value"].dropna().unique())

n_rec_reais = df.loc[df["resource"]=="Recalque", "value"].dropna().size
print("Recalque - leituras válidas após limpeza (esperado 0):", n_rec_reais)

succao_nan_pct = df.loc[df["resource"]=="Succao", "value"].isna().mean()*100
print(f"Succao - % NaN após limpeza: {succao_nan_pct:.2f}%")


In [None]:
df_415_keep = df.copy()


In [None]:
keep_415 = {
    "Succao",       # renomeado do FlexAnalogue4_1
    "Eng_RPM",
    "Oil_P",
    "Cool_T",
    "Oil_L",
    "Fuel_Con",
    "Fuel_L",
    "Stop",
    "Auto", "Man",
    "Bat_V", "Char_V",
}


In [None]:
# partimos do df (a CÓPIA limpa do 415)
res_col = df["resource"]  # não precisa converter tipo

keep_415 = {
    "Succao", "Eng_RPM", "Oil_P", "Cool_T", "Oil_L",
    "Fuel_Con", "Fuel_L", "Stop", "Auto", "Man", "Bat_V", "Char_V",
}

# mantém somente os resources de interesse
df_415_keep = df[res_col.isin(keep_415)].copy()

print("Resources mantidos no 415:\n", sorted(df_415_keep["resource"].dropna().unique()))
print("Tamanho final (long):", df_415_keep.shape)

# checagem: % de valores válidos por resource (após limpeza)
valid_rate = (
    df_415_keep.assign(valid = df_415_keep["value"].notna())
               .groupby("resource")["valid"].mean()
               .sort_values(ascending=False)
               .round(3)
)
print("\n% de valores válidos por resource:\n", valid_rate)


In [None]:
dups = (
    df_415_keep
      .groupby(["timestamp","resource"])
      .size()
      .reset_index(name="cnt")
)
print("Pares (timestamp, resource) com cnt>1:",
      (dups["cnt"]>1).sum())
dups[dups["cnt"]>1].head()

In [None]:
df_415_keep = (
    df_415_keep
      .sort_values(["timestamp", "resource"])
      .drop_duplicates(subset=["timestamp", "resource"], keep="last")
)

In [None]:
dups = (
    df_415_keep
      .groupby(["timestamp","resource"])
      .size()
      .reset_index(name="cnt")
)
print("Pares (timestamp, resource) com cnt>1:",
      (dups["cnt"]>1).sum())
dups[dups["cnt"]>1].head()

In [None]:
df_415_wide = (
    df_415_keep
      .pivot_table(index="timestamp", columns="resource", values="value", aggfunc="mean")
      .sort_index()
)


In [None]:
df_415_wide.info()

In [None]:
df_415_wide.head()

In [None]:
# 1) copiar o long filtrado
df_fix = df_415_keep.copy()

# 2) marcar quais linhas têm valor válido (não-NaN)
df_fix["is_valid"] = df_fix["value"].notna()

# 3) ordenar de modo que valores válidos fiquem por último dentro do mesmo (timestamp, resource)
df_fix = (
    df_fix
      .sort_values(["timestamp", "resource", "is_valid"])   # False (NaN) vem antes, True (válido) depois
      .drop_duplicates(subset=["timestamp", "resource"], keep="last")  # mantém o válido se existir
      .drop(columns="is_valid")
)

# 4) pivotar de novo (long -> wide)
df_415_wide = (
    df_fix
      .pivot_table(index="timestamp", columns="resource", values="value", aggfunc="mean")
      .sort_index()
)

print("Shape wide (refeito):", df_415_wide.shape)
print("Exemplo:", df_415_wide.head(5))

In [None]:

SENT = {2147483647.0, 2147483645.0}

# 1) partir do long filtrado que você já tem
df_fix = df_415_keep.copy()

# 1a) garantir limpeza de sentinelas AQUI (independe do que foi feito antes)
mask_sent = df_fix["value"].isin(SENT) & (df_fix["resource"].str.lower() != "stop")
df_fix.loc[mask_sent, "value"] = np.nan

# 2) deduplicar mantendo o válido (não-NaN)
df_fix["is_valid"] = df_fix["value"].notna()
df_fix = (
    df_fix
      .sort_values(["timestamp", "resource", "is_valid"])  # NaN primeiro, válidos por último
      .drop_duplicates(subset=["timestamp", "resource"], keep="last")
      .drop(columns="is_valid")
)

# 3) pivot long -> wide
df_415_wide = (
    df_fix
      .pivot_table(index="timestamp", columns="resource", values="value", aggfunc="mean")
      .sort_index()
)

print("Shape wide (limpo):", df_415_wide.shape)
print(df_415_wide.head(5))

# 4) checagem: nenhum sentinela deve restar
leftovers = {
    col: int((df_415_wide[col].isin(SENT)).sum())
    for col in df_415_wide.columns if col != "Stop"
}
print("Sentinelas remanescentes por coluna (deve ser 0):", leftovers)

In [None]:
rpm_vals = df_415_wide["Eng_RPM"].dropna().unique()
print("Nº de valores únicos de Eng_RPM:", len(rpm_vals))
print("Exemplos:", rpm_vals[:20])


In [None]:
# pegar só as linhas em que RPM é exatamente 0
rpm_zeros = df_415_wide[df_415_wide["Eng_RPM"] == 0]

print("Qtd de registros com RPM = 0:", len(rpm_zeros))
print(rpm_zeros[["Eng_RPM", "Stop"]].head(20))


In [None]:
# pegar só as linhas em que RPM é exatamente 0
rpm_sentinela = df_415_wide[df_415_wide["Eng_RPM"] == 2147483643.0]

print("Qtd de registros com RPM = 2.147484e+09:", len(rpm_sentinela))
print(rpm_sentinela[["Eng_RPM", "Stop"]].head(20))


In [None]:
import matplotlib.pyplot as plt

rpm_valid = df_415_wide["Eng_RPM"].dropna()
rpm_valid = rpm_valid[rpm_valid < 1e6]  # tira sentinelas

plt.figure(figsize=(8,5))
rpm_valid.hist(bins=50)
plt.title("Distribuição de Eng_RPM (sem sentinelas)")
plt.xlabel("RPM")
plt.ylabel("Frequência")
plt.show()


In [None]:
import numpy as np

SENT = [2147483647.0, 2147483645.0]
TOL = 1  # tolerância numérica

mask_rpm_sentinel = df_415_wide["Eng_RPM"].apply(
    lambda x: pd.notna(x) and any(abs(x - s) <= TOL for s in SENT)
)

print("Qtd de registros Eng_RPM = sentinela:", mask_rpm_sentinel.sum())


In [None]:
# listar todos os valores únicos de RPM (ordenados)
rpm_vals = sorted(df_415_wide["Eng_RPM"].dropna().unique())
print("Nº de valores únicos:", len(rpm_vals))
print("Todos os valores:")
for v in rpm_vals:
    print(v)


In [None]:
stop_vals = df_415_wide["Stop"].unique()
print("Valores únicos em Stop:", stop_vals)

In [None]:
print(df_415_wide["Stop"].value_counts(dropna=False))


In [None]:
print(df_415_wide.shape)

In [None]:
stopped = df_415_wide[df_415_wide["Stop"] == 1.0]
print("Qtd de registros com Stop=1:", len(stopped))
print(stopped.describe().T[["min","mean","max"]])


In [None]:
working = df_415_wide[df_415_wide["Stop"] == 0.0]
print("Qtd de registros com Stop=0:", len(working))
print(working.describe().T[["min","mean","max"]])

In [None]:
# cria uma nova coluna Stop_ffill aplicando forward fill
df_415_wide["Stop_ffill"] = df_415_wide["Stop"].fillna(method="ffill")

# checa quantos registros ficaram em cada status
print(df_415_wide["Stop_ffill"].value_counts())

In [None]:
df_415_wide["Stop_ffill"] = (
    df_415_wide["Stop"]
    .fillna(method="ffill")
    .fillna(method="bfill")  # preenche o que ficou vazio no começo
)

print("Total linhas:", len(df_415_wide))
print(df_415_wide["Stop_ffill"].value_counts())


In [None]:
# listar todos os valores únicos de RPM (ordenados)
rpm_vals = sorted(df_415_wide["Eng_RPM"].dropna().unique())
print("Nº de valores únicos:", len(rpm_vals))
print("Todos os valores:")
for v in rpm_vals:
    print(v)

In [None]:
import matplotlib.pyplot as plt

# garantir que Stop_ffill está 100% preenchido
df_415_wide["Stop_ffill"] = (
    df_415_wide["Stop"]
    .fillna(method="ffill")
    .fillna(method="bfill")
)

# selecionar um intervalo pequeno (ex: primeiras 2h de dados)
subset = df_415_wide.iloc[:5000]  # ~primeiras 500 linhas

fig, ax1 = plt.subplots(figsize=(12,6))

# eixo Y1 -> RPM
ax1.plot(subset.index, subset["Eng_RPM"], color="tab:blue", label="Eng_RPM")
ax1.set_ylabel("RPM", color="tab:blue")
ax1.tick_params(axis="y", labelcolor="tab:blue")

# eixo Y2 -> Stop_ffill
ax2 = ax1.twinx()
ax2.plot(subset.index, subset["Stop_ffill"], color="tab:red", linestyle="--", label="Stop_ffill")
ax2.set_ylabel("Stop_ffill (0=ligado, 1=desligado)", color="tab:red")
ax2.tick_params(axis="y", labelcolor="tab:red")

# título e legenda
plt.title("Eng_RPM vs Stop_ffill (primeiro intervalo)")
fig.tight_layout()
plt.show()
