In [1]:
from cognite.client import CogniteClient
from retrieve_data import RetrieveData
import numpy as np
import matplotlib as plt
import pandas as pd

# Instantiate Cognite SDK client:
client = CogniteClient()

In [2]:
#1. Initialize a DataRetriever Class.
retriever = RetrieveData(client)

In [3]:
#2. Assign the desired pi tag to a variable
pitag1 = "KASAWARI_PI_PC.SKA.KSCPP.DCS.SW.22PIA-0028.PV"
start_time = "365d-ago"
end_time = "now"
agg = "interpolation"
interval = "10m"




In [4]:
#3. Call out retrieve_data method from the initialized class
df1 = retriever.retrieve_data(ex_id=pitag1, start_time_str=start_time, end_time_str=end_time, agg=agg, interval=interval)

In [5]:
#4. display top 5 of the dataframe and info
df1.head()

Unnamed: 0,Timestamp,value
0,2024-07-31 17:09:00,78.960214
1,2024-07-31 17:19:00,78.15077
2,2024-07-31 17:29:00,77.485237
3,2024-07-31 17:39:00,77.185356
4,2024-07-31 17:49:00,76.919119


In [6]:
df1.describe()

Unnamed: 0,Timestamp,value
count,35885,35885.0
mean,2024-12-10 12:08:24.235752960,67.776147
min,2024-07-31 17:09:00,-0.664977
25%,2024-10-02 21:19:00,72.932143
50%,2024-12-04 12:49:00,74.414554
75%,2025-02-15 08:09:00,75.580904
max,2025-05-27 01:49:00,86.887728
std,,21.413453


In [7]:
#function to label anomaly and alarms
def assign_status(value, mean, std):
    try:
        if pd.isna(value):
            return "Bad Data"
        elif value < 65.8:
            return "Low Alarm"
        elif value > 86:
            return "High Alarm"
        elif abs(value-mean) > std:
            return "Anomaly"
        else:
            return "Normal"
    except:
        return "Bad Data"


In [8]:
# Function to label TTF from anomaly to next alarm
def calculate_ttf(df, time_col="Timestamp"):
    df["TTF"] = None
    
    # Create a new DataFrame that filters only alarm rows (Low/High Alarm)
    alarm_rows = df[df["status"].isin(["Low Alarm", "High Alarm"])].reset_index(drop=True)

    # Calculate TTF for "Anomaly" rows that precede an Alarm
    for i in df.index:
        if df.at[i, "status"] == "Anomaly":
            current_time = df.at[i, time_col]
            future_alarm = alarm_rows[alarm_rows[time_col] > current_time]
            if not future_alarm.empty:
                next_alarm_time = future_alarm.iloc[0][time_col]
                ttf_hours = (next_alarm_time - current_time).total_seconds() / 3600
                df.at[i, "TTF"] = round(ttf_hours, 2)
    
    return df

In [9]:
#Function to label status pre alarm
def label_status_pre_alarm(df, value_col="value", time_col="Timestamp", pre_alarm_window=3):
    df = df.copy()
    df["status"] = "Normal"

    # Find indexes of alarm rows
    alarm_indexes = df[df["value"] < 65.8].index.tolist() + df[df["value"] > 86].index.tolist()

    for idx in alarm_indexes:
        df.at[idx, "status"] = "Low Alarm" if df.at[idx, value_col] < 65.8 else "High Alarm"

        # Flag N rows before the alarm as Anomaly
        for i in range(1, pre_alarm_window + 1):
            if idx - i >= 0 and df.at[idx - i, "status"] == "Normal":
                df.at[idx - i, "status"] = "Anomaly"

    return df


In [10]:
# mean = df1["value"].mean()
# std = df1["value"].std()
df1["Timestamp"] = pd.to_datetime(df1["Timestamp"])

In [11]:
#Step 1: Assign Status
df1 = label_status_pre_alarm(df1, value_col="value", pre_alarm_window=3)

In [12]:
#Step 2: Caclulate TTF
df1 = calculate_ttf(df1)

In [13]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35885 entries, 0 to 35884
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Timestamp  35885 non-null  datetime64[ns]
 1   value      35885 non-null  float64       
 2   status     35885 non-null  object        
 3   TTF        76 non-null     object        
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 841.1+ KB


In [14]:
retriever.save_to_csv(df1)

Saved to PV.csv


In [15]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

# STEP 1: Prepare training data
# Keep only rows where TTF is available (i.e., anomaly rows before alarm)
train_df = df1[df1["TTF"].notna()].copy()

# STEP 2: Define feature(s) and target
X = train_df[["value"]]  # You can add more features later
y = train_df["TTF"]

# STEP 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# STEP 4: Train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# STEP 5: Evaluate the model
y_pred = model.predict(X_test)

print("📊 Model Evaluation")
print("----------------------------")
print("Mean Absolute Error:", round(mean_absolute_error(y_test, y_pred), 2))
print("R² Score:", round(r2_score(y_test, y_pred), 2))

# STEP 6: Predict TTF for a new process value
new_value = [[75.0]]  # Replace with current or test value
predicted_ttf = model.predict(new_value)
print(f"\n🕒 Predicted TTF for value {new_value[0][0]}: {predicted_ttf[0]:.2f} hours")


📊 Model Evaluation
----------------------------
Mean Absolute Error: 2.08
R² Score: -465.16





🕒 Predicted TTF for value 75.0: 3.07 hours
