In [None]:
import os
import json
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import mean_squared_error

# Paths

In [None]:
data_folder = os.path.join(os.path.join("..", "..", "data"))
raw_data_folder = os.path.join(data_folder, "Raw Data")

reunion_folder = os.path.join(raw_data_folder, "Riunione 24-04-2024")
feltre_folder = os.path.join(reunion_folder, "feltre")


# Load Data

In [None]:
spectra_df = pd.read_excel(os.path.join(feltre_folder, "Spectra_ISOIL_21.06.2024-06.08.2024.xlsx"), header=1)
data_df = pd.read_excel(os.path.join(feltre_folder, "Data_ISOIL_21.06.2024-06.08.2024.xlsx"), header=1)
bacteria_df = pd.read_excel(os.path.join(feltre_folder, "Bactosense_dati_Luglio2024.xlsx"))

# Inspection

## Spectra

In [None]:
spectra_df

In [None]:
spectra_df.rename(
    columns={
        'Measurement interval=0[sec] (Export-Aggregation disabled)': 'DateTime',
    },
    inplace=True
)

# drop rows where in the Status column is not present the word "OK"
spectra_df = spectra_df[spectra_df['Status (Source:0)'].str.contains("Ok")]
spectra_df.drop(columns=['Status (Source:0)'], inplace=True)

spectra_df.set_index('DateTime', inplace=True)

## Other Params

In [None]:
data_df

In [None]:
# drop columns that contain the word "Status"
data_df = data_df.loc[:, ~data_df.columns.str.contains('Status')]

data_df.rename(
    columns={
        'Measurement interval=0[sec] (Export-Aggregation disabled)': 'DateTime',
    },
    inplace=True
)

data_df = data_df.loc[:, ~data_df.columns.str.contains('Status')]

data_df.set_index('DateTime', inplace=True)

## Bacteria

Dato isolato al 16/07, poi dal 22/07 ogni due ore.

• ICC [1/mL]: concentrazione di cellule intatte;

• HNAC [1/mL]: concentrazione di cellule ad alto contenuto di acido nucleico;

• LNAC [1/mL]: concentrazione di cellule a basso contenuto di acido nucleico;

• HNAP [%]: frazione di ICC costituita da cellule ad alto contenuto di acido nucleico.

Tendenzialmente, HNAC+LNAC dovrebbe risultare in ICC.

In [None]:
bacteria_df

In [None]:
bacteria_df.columns.to_list()

In [None]:
# drop columns that contain the word "Status"
bacteria_df.drop(
    columns=[
        'Timestamp',
        'Date [local]',
        'Date [GMT]',
        'Instrument Name',
        'Instrument SN',
        'Mode',
        'Name',
        'Protocol',
        'Warnings',
        'Alarms',
        'Cartridge Fill'
    ],
    inplace=True
)

bacteria_df.rename(
    columns={
        'Sampling Date [local]': 'DateTime',
    },
    inplace=True
)

bacteria_df.set_index('DateTime', inplace=True)

# remove first row
bacteria_df = bacteria_df.iloc[1:]

# Confront UV254

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=data_df.index,
        y=data_df["UV254 - Result (Limit:-Infinity-+Infinity)"],
        mode="lines",
        name="UV254 - Data",
        line=dict(color="red")
    )
)

fig.add_trace(
    go.Scatter(
        x=spectra_df.index,
        y=spectra_df["254 nm"],
        mode="lines",
        name="UV254 - Spectra",
        line=dict(color="blue")
    )
)

fig.show()

In [None]:
rmse = np.sqrt(mean_squared_error(data_df["UV254 - Result (Limit:-Infinity-+Infinity)"], spectra_df["254 nm"]))
rmse

# Plots

## Time-series

### Spectra

In [None]:
columns = ['254 nm']

for col in columns:
    
    fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            x=spectra_df.index,
            y=spectra_df[col],
            mode="lines",
            name=col,
        )
    )
    
    fig.update_layout(showlegend=True)

    fig.show()

### Other Params

In [None]:
# get the first part of the name of each column in the data_df before the '-'
columns = data_df.columns
columns = [column.split('-')[0] for column in columns]

# get unique values
columns = sorted(list(set(columns)))

In [None]:
import plotly.graph_objects as go

for col in columns:
    df = data_df.filter(regex=col, axis=1)
    
    fig = go.Figure()
    
    for column in df.columns:
        
        full_name = column.split(']')
        name = full_name[0] + ']'
        if len(full_name) > 1:
            limit = full_name[1]
        else:
            limit = ""
        
        # Ensure each trace has a unique name
        trace_name = name + limit
        
        fig.add_trace(
            go.Scatter(
                x=df.index,
                y=df[column],
                mode="lines",
                name=trace_name,
            )
        )
        
    fig.update_layout(
        showlegend=True,
        legend=dict(
            orientation="h",
            yanchor="top",
            y=-0.12,
            font=dict(size=10)
        )
    )
        
    fig.show()

### Bacteria

In [None]:
for col in bacteria_df.columns:
    
    fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            x=bacteria_df.index,
            y=bacteria_df[col],
            mode="lines",
            name=col,
        )
    )
    
    fig.update_layout(showlegend=True)

    fig.show()

#### HNAC+LNAC dovrebbe risultare uguale a ICC

In [None]:
bacteria_df.columns

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=bacteria_df.index,
        y=bacteria_df['ICC [1/mL]'],
        mode="lines",
        name='ICC [1/mL]',
    )
)

fig.add_trace(
    go.Scatter(
        x=bacteria_df.index,
        y=bacteria_df['HNAC [1/mL]'] + bacteria_df['LNAC [1/mL]'],
        mode="lines",
        name='HNAC [1/mL] + LNAC [1/mL]',
    )
)

fig.show()

### Before and After 4th July

#### Before

##### Spectra

In [None]:
columns = ['254 nm']

for col in columns:
    
    before_df = spectra_df[spectra_df.index < "2024-07-04 00:00:00"]
    
    fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            x=before_df.index,
            y=before_df[col],
            mode="lines",
            name=col,
        )
    )
    
    fig.update_layout(showlegend=True)

    fig.show()

##### Other Params

In [186]:
# get the first part of the name of each column in the data_df before the '-'
columns = data_df.columns
columns = [column.split('-')[0] for column in columns]

# get unique values
columns = sorted(list(set(columns)))

In [None]:
for col in columns:
    
    # regex that matches the column name and contains the word "Measured" or "Result"
    regex = col + ".*(?=Measured|Result)"
    
    df = data_df.filter(regex=regex, axis=1)
    
    fig = go.Figure()
    
    column = df.columns.to_list()[0]
        
    full_name = column.split('-')
    
    name = full_name[0]
    
    # take the unit of measurement between the square brackets
    unit = full_name[1].split('[')[1].split(']')[0] if '[' in full_name[1] else ''
    unit = '[' + unit + ']' if unit else ""
    
    before_df = df[df.index < '2024-07-04 00:00:00']
    
    # add box plot before and after the change
    fig.add_trace(
        go.Scatter(
            x=before_df.index,
            y=before_df[column],
            mode="lines",
            name=name + unit,
        )
    )
    
    
        
    fig.update_layout(
        showlegend=True,
        title=dict(
            text=f"{name + unit} before 4th July 2024",
            font=dict(size=20),
        ),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.01,
            xanchor="right",
            x=1,
            font=dict(size=20)  # Adjust font size for legend
        ),
        yaxis_title=name + unit,
        # change x-axis font
        xaxis=dict(
            tickfont=dict(size=20)
        )
    )
        
    fig.show()

#### After

##### Spectra

In [191]:
columns = ['254 nm']

for col in columns:
    
    after_df = spectra_df[spectra_df.index > "2024-07-04 23:59:59"]
    
    fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            x=after_df.index,
            y=after_df[col],
            mode="lines",
            name=col,
        )
    )
    
    fig.update_layout(showlegend=True)

    fig.show()

##### Other Params

In [192]:
# get the first part of the name of each column in the data_df before the '-'
columns = data_df.columns
columns = [column.split('-')[0] for column in columns]

# get unique values
columns = sorted(list(set(columns)))

In [193]:
for col in columns:
    
    # regex that matches the column name and contains the word "Measured" or "Result"
    regex = col + ".*(?=Measured|Result)"
    
    df = data_df.filter(regex=regex, axis=1)
    
    fig = go.Figure()
    
    column = df.columns.to_list()[0]
        
    full_name = column.split('-')
    
    name = full_name[0]
    
    # take the unit of measurement between the square brackets
    unit = full_name[1].split('[')[1].split(']')[0] if '[' in full_name[1] else ''
    unit = '[' + unit + ']' if unit else ""
    
    after_df = df[df.index > '2024-07-04 23:59:59']
    
    # add box plot before and after the change
    fig.add_trace(
        go.Scatter(
            x=after_df.index,
            y=after_df[column],
            mode="lines",
            name=name + unit,
        )
    )
    
    
        
    fig.update_layout(
        showlegend=True,
        title=dict(
            text=f"{name + unit} after 4th July 2024",
            font=dict(size=20),
        ),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.01,
            xanchor="right",
            x=1,
            font=dict(size=20)  # Adjust font size for legend
        ),
        yaxis_title=name + unit,
        # change x-axis font
        xaxis=dict(
            tickfont=dict(size=20)
        )
    )
        
    fig.show()

## Boxplots

It seems like something happened in 4th July, so we do two different boxplots before and after that date.

### Spectra

In [None]:
columns = ['254 nm']

for col in columns:
    
    fig = go.Figure()
    
    df = spectra_df[col]
    
    before_df = df[df.index < '2024-07-04 00:00:00']
    after_df = df[df.index > '2024-07-04 23:59:59']

    # add box plot before and after the change
    fig.add_trace(
        go.Box(
            y=before_df,
            name="Before",
            marker_color="blue"
        )
    )
    
    fig.add_trace(
        go.Box(
            y=after_df,
            name="After",
            marker_color="red"
        )
    )
    
    fig.update_layout(
        title=f"Box plot of {col} before and after 4th July 2024",
        yaxis_title=col
    )
    
    fig.show()    

### Other Params

In [None]:
# get the first part of the name of each column in the data_df before the '-'
columns = data_df.columns
columns = [column.split('-')[0] for column in columns]

# get unique values
columns = sorted(list(set(columns)))

In [None]:
data_df.filter(regex='Temperature', axis=1).columns.to_list()

In [None]:
# drop redundant temperature columns
data_df.drop(
    columns=[
        'Temperature - Measured value [C] (Limit:-5.00-100.00_Coefs:-0.40 1.00 0.00 0.00)',
        'Temperature - Clean value [C] (Limit:-5.00-100.00)',
        'Temperature - Measured value [°C] (Limit:0.00-45.00_Coefs:0.00 0.00 0.00 0.00)',
        'Temperature - Clean value [°C] (Limit:0.00-45.00)'
    ],
    inplace=True
)

In [None]:
for col in columns:
    
    # regex that matches the column name and contains the word "Measured" or "Result"
    regex = col + ".*(?=Measured|Result)"
    
    df = data_df.filter(regex=regex, axis=1)
    
    fig = go.Figure()
    
    column = df.columns.to_list()[0]
        
    full_name = column.split('-')
    
    name = full_name[0]
    
    # take the unit of measurement between the square brackets
    unit = full_name[1].split('[')[1].split(']')[0] if '[' in full_name[1] else ''
    unit = '[' + unit + ']' if unit else ""
    
    before_df = df[df.index < '2024-07-04 00:00:00']
    after_df = df[df.index > '2024-07-04 23:59:59']
    
    # add box plot before and after the change
    fig.add_trace(
        go.Box(
            y=before_df[column],
            name="Before",
        )
    )
    
    fig.add_trace(
        go.Box(
            y=after_df[column],
            name="After",
        )
    )
        
    fig.update_layout(
        showlegend=True,
        title=dict(
            text=f"{name + unit} before and after 4th July 2024",
            font=dict(size=20),
        ),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.01,
            xanchor="right",
            x=1,
            font=dict(size=20)  # Adjust font size for legend
        ),
        yaxis_title=name + unit,
        # change x-axis font
        xaxis=dict(
            tickfont=dict(size=20)
        )
    )
        
    fig.show()