Qua si analizzano le misurazioni del BactoSense prima della disinfezione, che serviva a Marco, per capire quanto sia valida la frequenza di 6 ore ai fini delle informazioni che si possono ricavare.

In [None]:
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from prophet import Prophet
from prophet.plot import plot_components_plotly, plot_plotly

import statsmodels.api as sm

from sklearn.metrics import mean_squared_error

# Paths

In [None]:
data_folder = os.path.join(os.path.join("..", '..', "data"))
raw_data_folder = os.path.join(data_folder, "Raw Data")

reunion_folder = os.path.join(raw_data_folder, "Riunione 24-04-2024")
intermediate_data_folder = os.path.join(data_folder, "Intermediate Data")

feltre_folder = os.path.join(reunion_folder, "feltre")

# Load Data

In [None]:
bacteria_2H_df = pd.read_excel(
    os.path.join(feltre_folder, "Bactosense_dati_Luglio2024.xlsx")
)

bactosense_6H_df = pd.read_excel(
    os.path.join(raw_data_folder, "Batosense_data_6h_september.xlsx")
)

bacteria_dict = {
    "2H": bacteria_2H_df,
    "6H": bactosense_6H_df,
}

# Inspection

* BactoSense: misurazioni di:
    * ICC [1/mL]: concentrazione di cellule intatte
    * HNAC [1/mL]: concentrazione di cellule ad alto contenuto di acido nucleico
    * LNAC [1/mL]: concentrazione di cellule a basso contenuto di acido nucleico
    * HNAP [%]: frazione di ICC costituita da cellule ad alto contenuto di acido nucleico
    * TCC [1/mL] (no valori)
    * GATE+ [1/mL] (no valori)
    * ACC [1/mL] (no valori)
    * HACC [1/mL] (no valori)
    * LACC [1/mL] (no valori)
    * HACP [%] (no valori)


In questo [sito](https://amf.ch/application/microfluidic-flow-cytometry-quality-water-analysis/) pare che il TCC si possa ricavare dall'HNAP
HNAP = HNAC/TCC, però pare che dalla mail di Marco sia HNAC/ICC

In [None]:
# drop columns that contain the word "Status"

for key in bacteria_dict:
    
    bacteria_df = bacteria_dict[key]

    bacteria_df.drop(
        columns=[
            "Timestamp",
            "Date [local]",
            "Date [GMT]",
            "Instrument Name",
            "Instrument SN",
            "Mode",
            "Name",
            "Protocol",
            "Warnings",
            "Alarms",
            "Cartridge Fill",
            "TCC [1/mL]",
            "GATE+ [1/mL]",
            "ACC [1/mL]",
            "HACC [1/mL]",
            "LACC [1/mL]",
            "HACP [%]",
        ],
        inplace=True,
    )

    bacteria_df.rename(
        columns={
            "Sampling Date [local]": "DateTime",
        },
        inplace=True,
    )

    bacteria_df.set_index("DateTime", inplace=True)


    # remove first row from 2H data
    if key == "2H":
        bacteria_df = bacteria_df.iloc[1:]
        
    bacteria_dict[key] = bacteria_df

## Timeseries

In [None]:
for key in bacteria_dict:
    
    bacteria_df = bacteria_dict[key]

    for col in bacteria_df.columns:
        fig = go.Figure()

        fig.add_trace(
            go.Scatter(
                x=bacteria_df.index,
                y=bacteria_df[col],
                mode="lines",
                name=col,
            )
        )
        
        first_date = bacteria_df.index[0].date()
        last_date = bacteria_df.index[-1].date()

        fig.update_layout(
            title=f"{key} - {col} - {first_date} - {last_date}",
            xaxis_title="Date",
            yaxis_title=col,
            showlegend=True,
        )
        
        fig.show()

#### HNAC+LNAC dovrebbe risultare uguale a ICC

In [None]:
for key in bacteria_dict:
    
    bacteria_df = bacteria_dict[key]

    fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            x=bacteria_df.index,
            y=bacteria_df["ICC [1/mL]"],
            mode="lines",
            name="ICC [1/mL]",
        )
    )

    fig.add_trace(
        go.Scatter(
            x=bacteria_df.index,
            y=bacteria_df["HNAC [1/mL]"] + bacteria_df["LNAC [1/mL]"],
            mode="lines",
            name="HNAC [1/mL] + LNAC [1/mL]",
        )
    )

    fig.update_layout(
        title=f"{key} - ICC [1/mL] vs HNAC [1/mL] + LNAC [1/mL]",
        xaxis_title="Date",
        yaxis_title="Concentration [1/mL]",
        showlegend=True,
    )
    
    fig.show()

#### HNAP dovrebbe essere uguale a HNAC/ICC

In [None]:
for key in bacteria_dict:
    
    bacteria_df = bacteria_dict[key]

    fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            x=bacteria_df.index,
            y=bacteria_df["HNAP [%]"] * 100,
            mode="lines",
            name="HNAP [%]",
        )
    )

    fig.add_trace(
        go.Scatter(
            x=bacteria_df.index,
            y=bacteria_df["HNAC [1/mL]"] / bacteria_df["ICC [1/mL]"] * 100,
            mode="lines",
            name="HNAC [1/mL] / ICC [1/mL]",
        )
    )

    fig.update_layout(
        title=f"{key} - HNAP [%] vs HNAC [1/mL] / ICC [1/mL]",
        xaxis_title="Date",
        yaxis_title="%",
        showlegend=True,
    )
    
    fig.show()

## Decomposition

### Prophet

In [None]:
for col in bacteria_df.columns.difference(['DateTime']):
    
    for key in bacteria_dict:
        
        bacteria_df = bacteria_dict[key]
    
        model = Prophet(weekly_seasonality=False)
        
        df = bacteria_df[[col]].reset_index()
        df.columns = ['ds', 'y']
        
        model.fit(df)
        
        future = model.make_future_dataframe(periods=0)
        forecast = model.predict(future)
        
        fig = plot_components_plotly(model, forecast)
        
        fig.update_layout(
            title=f"{key} - {col}",
        )

        col_ = col.replace("/", "_")

        fig.write_image(
            f'/Users/massimilianoarca/Downloads/grafici/Prophet_{key}_{col_}.png'
        )
  


## STL

In [None]:
for col in bacteria_df.columns.difference(['DateTime']):
    
    for key in bacteria_dict:
        
        bacteria_df = bacteria_dict[key]
    
        # perform stl decomposition
        
        if key == "2H":
            df = bacteria_df[[col]].resample("2h").mean().interpolate(method="time")
            
            res = sm.tsa.seasonal_decompose(df)
        else:
            df = bacteria_df[[col]].resample("6h").mean().interpolate(method="time")
            
            res = sm.tsa.seasonal_decompose(df)
        
        fig = make_subplots(rows=3, cols=1)
        
        fig.add_trace(
            go.Scatter(
                x=df.index,
                y=res.trend,
                mode="lines",
                name="Trend",
            ),
            row=1,
            col=1,
        )
        
        fig.add_trace(
            go.Scatter(
                x=df.index,
                y=res.seasonal,
                mode="lines",
                name="Seasonal",
            ),
            row=2,
            col=1,
        )
        
        fig.add_trace(
            go.Scatter(
                x=df.index,
                y=res.resid,
                mode="lines",
                name="Residual",
            ),
            row=3,
            col=1,
        )
        
        fig.update_layout(
            title=f"{key} - {col}",
            showlegend=True,
        )
        
        col_ = col.replace("/", "_")
        
        fig.write_image(
            f'/Users/massimilianoarca/Downloads/grafici/STL_{key}_{col_}.png'
        )

## Boxplot

In [None]:
for col in bacteria_df.columns.difference(['DateTime']):
    
    for key in bacteria_dict:
        
        bacteria_df = bacteria_dict[key].copy()
        
        # if key == "2H":
        #     bacteria_df = bacteria_df.resample("2h").mean().interpolate(method="time")
        # else:
        #     bacteria_df = bacteria_df.resample("6h").mean().interpolate(method="time")
    
        # for each hour, create a boxplot of the values
        
        fig = go.Figure()
        
        for hour in range(0, 24):
            
            df = bacteria_df[bacteria_df.index.hour == hour]
            
            # remove outliers
            Q1 = df[col].quantile(0.25)
            Q3 = df[col].quantile(0.75)
            
            IQR = Q3 - Q1
            
            df = df[(df[col] > (Q1 - 1.5 * IQR)) & (df[col] < (Q3 + 1.5 * IQR))]
            
            fig.add_trace(
                go.Box(
                    y=df[col],
                    name=f"Hour {hour}",
                )
            )
            
            
        fig.update_layout(
            title=f"{key} - {col}",
            showlegend=True,
        )
        
        col_ = col.replace("/", "_")
        
        fig.write_image(
            f'/Users/massimilianoarca/Downloads/grafici/Boxplot_{key}_{col_}.png'
        )