<a href="https://colab.research.google.com/github/SIMBL742/PBR_FERM/blob/main/PBR_FERM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Import Packages

In [4]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load and Preprocess Data
def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.sort_values(by=['fermenter_id', 'timestamp'], inplace=True)
    df.set_index('timestamp', inplace=True)
    df.ffill(inplace=True)
    return df

# Feature Engineering
def engineer_features(df, features, rolling_window=6):
    for feature in features:
        df[f'{feature}_rolling_mean'] = (
            df.groupby('fermenter_id')[feature]
              .rolling(window=rolling_window).mean().reset_index(drop=True)
        )
        df[f'{feature}_delta'] = df.groupby('fermenter_id')[feature].diff()
    return df

# Normalize Features
def normalize_features(df, features):
    scaler = StandardScaler()
    df[features] = scaler.fit_transform(df[features])
    return df, scaler

# Create Sequences
def create_sequences(df, features, window_size=12):
    sequences, labels = [], []
    for _, group in df.groupby('fermenter_id'):
        for i in range(len(group) - window_size):
            window = group.iloc[i:i+window_size]
            sequences.append(window[features].values)
            labels.append(window['infection_label'].iloc[-1])
    return np.array(sequences), np.array(labels)

# Split Data and Convert to Tensors
def split_and_tensorize(sequences, labels, test_size=0.2):
    X_train, X_test, y_train, y_test = train_test_split(
        sequences, labels, test_size=test_size, shuffle=False
    )
    return (
        torch.tensor(X_train, dtype=torch.float32),
        torch.tensor(X_test, dtype=torch.float32),
        torch.tensor(y_train, dtype=torch.float32),
        torch.tensor(y_test, dtype=torch.float32),
    )

# LSTM Model
class FermenterLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(FermenterLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        return torch.sigmoid(self.fc(out))

# Prediction Function
def predict_fermenter_status(model, latest_window_data):
    model.eval()
    with torch.no_grad():
        input_tensor = torch.tensor(latest_window_data, dtype=torch.float32).unsqueeze(0)
        pred_prob = model(input_tensor).item()
    print(f"üß™ Prediction probability: {pred_prob:.4f}")
    if pred_prob > 0.5:
        print("‚ö†Ô∏è Infection likely in next few hours!")
    return pred_prob

# Example Usage
filepath = 'https://raw.githubusercontent.com/SIMBL742/PBR_FERM/refs/heads/main/fake_fermenter_data_.csv'  # Update this to your actual file path
df = load_and_preprocess_data(filepath)
features = df.columns[2:-3]  # Adjust the slice according to the feature columns and exclude labels

df = engineer_features(df, features)
df, scaler = normalize_features(df, features)
sequences, labels = create_sequences(df, features)

X_train, X_test, y_train, y_test = split_and_tensorize(sequences, labels)
#ask what it means by dummy data
# Assuming model and some latest_window_data are prepared for prediction
model = FermenterLSTM(input_size=len(features), hidden_size=50, num_layers=2)
latest_window_data = np.random.randn(12, len(features))  # Dummy data, replace with actual
predicted_probability = predict_fermenter_status(model, latest_window_data)
print(f"Predicted probability of infection: {predicted_probability}")


üß™ Prediction probability: 0.4906
Predicted probability of infection: 0.4906059503555298
