<a href="https://colab.research.google.com/github/SIMBL742/PBR_FERM/blob/main/PBR_FERM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Import Packages

In [5]:

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

# Load and Preprocess Data
def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.sort_values(by=['fermenter_id', 'timestamp'], inplace=True)
    df.fillna(method='ffill', inplace=True)
    return df

# Feature Engineering
def engineer_features(df, features, rolling_window=6):
    for feature in features:
        df[f'{feature}_rolling_mean'] = (
            df.groupby('fermenter_id')[feature]
              .rolling(window=rolling_window).mean().reset_index(drop=True)
        )
        df[f'{feature}_delta'] = df.groupby('fermenter_id')[feature].diff()
    return df

# Normalize Features
def normalize_features(df, features):
    scaler = StandardScaler()
    df[features] = scaler.fit_transform(df[features])
    return df, scaler

# Label Infections
def label_infections(df, infection_events):
    df['label'] = 0
    for _, row in infection_events.iterrows():
        f_id = row['fermenter_id']
        inf_time = row['infection_time']
        mask = (df['fermenter_id'] == f_id) & (df['timestamp'] >= inf_time - pd.Timedelta(hours=6)) & (df['timestamp'] < inf_time)
        df.loc[mask, 'label'] = 1
    return df

# Create Sequences
def create_sequences(df, features, window_size=12):
    sequences, labels = [], []
    for _, group in df.groupby('fermenter_id'):
        for i in range(len(group) - window_size):
            window = group.iloc[i:i+window_size]
            sequences.append(window[features].values)
            labels.append(window['label'].iloc[-1])
    return np.array(sequences), np.array(labels)

# Split Data and Convert to Tensors
def split_and_tensorize(sequences, labels, test_size=0.2):
    X_train, X_test, y_train, y_test = train_test_split(
        sequences, labels, test_size=test_size, shuffle=False
    )
    return (
        torch.tensor(X_train, dtype=torch.float32),
        torch.tensor(X_test, dtype=torch.float32),
        torch.tensor(y_train, dtype=torch.float32),
        torch.tensor(y_test, dtype=torch.float32),
    )

# LSTM Model
class FermenterLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(FermenterLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        return torch.sigmoid(self.fc(out))

# Example Usage
filepath = 'PBR_DATA.csv'  # Update this to your actual file path
df = load_and_preprocess_data(filepath)
features = df.columns[2:-1]  # Assuming the first two columns are fermenter_id and timestamp

df = engineer_features(df, features)
df, scaler = normalize_features(df, features)
infection_events = pd.DataFrame()  # This should be loaded or defined with actual data
df = label_infections(df, infection_events)
sequences, labels = create_sequences(df, features)

X_train, X_test, y_train, y_test = split_and_tensorize(sequences, labels)
