In [1]:
def count_values(arr):
  unique_values, counts = np.unique(arr, return_counts=True)

  # Print the unique values and their frequencies
  for value, count in zip(unique_values, counts):
      print(f"{value}: {count}")

In [2]:
def split_data_balanced(X, y, test_size=0.2, random_state=None):
    # Find unique labels and their counts
    unique_labels, label_counts = np.unique(y, return_counts=True)

    # Find the minimum label count
    min_label_count = np.min(label_counts)

    # Split the data for each label, ensuring balanced classes in the test set
    X_train, X_test, y_train, y_test = [], [], [], []
    for label in unique_labels:
        # Split the data for the current label
        X_label = X[y == label]
        y_label = y[y == label]
        X_label_train, X_label_test, y_label_train, y_label_test = train_test_split(
            X_label, y_label, test_size=test_size, random_state=random_state
        )

        # Add the split data to the overall train and test sets
        X_train.append(X_label_train)
        X_test.append(X_label_test)
        y_train.append(y_label_train)
        y_test.append(y_label_test)

    # Concatenate the data from all labels
    X_train = np.concatenate(X_train)
    X_test = np.concatenate(X_test)
    y_train = np.concatenate(y_train)
    y_test = np.concatenate(y_test)

    return X_train, X_test, y_train, y_test


In [3]:
def segmantation(X,y,window_length=36 ,step_size=1):
      # Define sliding window parameters
      window_length = 36  # Length of each segment
      step_size = 1  # Amount of overlap between segments
      # Segment the time series data with sliding window
      segments = []
      labels = []

      for i in range(0, len(X) - window_length, step_size):
          segment = X[i:i+window_length]
          segments.append(segment)
          
          # Assign label to the segment based on the presence of anomalies
          segment_labels = y[i:i+window_length]
          if np.any(segment_labels == 1):
              label = 1  # Anomaly present
          else:
              label = 0  # No anomaly
          labels.append(label)

      # Convert segments and labels to numpy arrays
      segments = np.array(segments)
      labels = np.array(labels)
      return segments,labels

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

file_path =r"C:\Users\hp\Desktop\M2\PFE\Code\code pfe\Coud source\Code\machine learnig algorithme test\data_kick.xlsx"
df=pd.read_excel(file_path)

In [5]:
X = df[[ 'TVA (m3)', 'SPPA (kPa)', 'MFOP ((m3/s)/(m3/s))', 'GASA (mol/mol)']]
y = df['STATUS']

In [6]:
df[df['STATUS']==1].index

Int64Index([14571, 14572, 14573, 14574, 14575, 14576, 14577, 14578, 14579,
            14580,
            ...
            53240, 53241, 53242, 53243, 53244, 53245, 53246, 53247, 53248,
            53249],
           dtype='int64', length=1238)

In [7]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
# Select the columns to normalize
# Perform the min-max normalization
X= scaler.fit_transform(X)

In [8]:
window=36
segments,labels= segmantation(X,y,window_length=window ,step_size=1)

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = split_data_balanced(segments, labels, test_size=0.2)
print(X_train.shape,X_test.shape)
# Reshape the feature matrices for SVM
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)
print(X_train.shape,X_test.shape)

(42570, 36, 4) (10644, 36, 4)
(42570, 144) (10644, 144)


In [10]:
count_values(y_test)
count_values(y_train)

0: 10354
1: 290
0: 41413
1: 1157


In [11]:
from tslearn.metrics import dtw
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
knn = KNeighborsClassifier(n_neighbors=3,metric=dtw)
knn.fit(X_train, y_train)


In [20]:

y_prd=[]

In [21]:
for i in range(2000,3000): 
    y_pred = knn.predict(X_test[i-1:i])
    y_prd.append(y_pred[0])
    # print(i,i-1)

In [22]:
count_values(y_prd)

0: 1000


In [None]:
y_actual=y_test[2000:3000]
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score,roc_auc_score

print(confusion_matrix(y_actual,y_prd))
print("Accuracy:", accuracy_score(y_actual,y_prd))
print("Precision:", precision_score(y_actual,y_prd))
print("Recall:", recall_score(y_actual,y_prd))
print("F1 Score:",f1_score(y_actual,y_prd))

In [19]:
y_actual=y_test[7000:8000]
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score,roc_auc_score

print(confusion_matrix(y_actual,y_prd))
print("Accuracy:", accuracy_score(y_actual,y_prd))
print("Precision:", precision_score(y_actual,y_prd))
print("Recall:", recall_score(y_actual,y_prd))
print("F1 Score:",f1_score(y_actual,y_prd))

[[1000]]
Accuracy: 1.0
Precision: 0.0
Recall: 0.0
F1 Score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


In [16]:
count_values(y_actual)

0: 1000


In [17]:
y_pred = knn.predict(X_test)

KeyboardInterrupt: 

In [None]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score,roc_auc_score

print(confusion_matrix(y_test,y_pred))
print("Accuracy:", accuracy_score(y_test,y_pred))
print("Precision:", precision_score(y_test,y_pred))
print("Recall:", recall_score(y_test,y_pred))
print("F1 Score:",f1_score(y_test,y_pred))


[[10342    12]
 [   95   195]]
Accuracy: 0.9899473881999249
Precision: 0.9420289855072463
Recall: 0.6724137931034483
F1 Score: 0.7847082494969819


In [23]:
import psutil

# CPU information
cpu_count = psutil.cpu_count(logical=False)  # Number of physical CPU cores
logical_cpu_count = psutil.cpu_count(logical=True)  # Number of logical CPU cores

# Memory information
memory_info = psutil.virtual_memory()
total_memory = memory_info.total / (1024**3)  # Total memory in GB

print(f"Physical CPU cores: {cpu_count}")
print(f"Logical CPU cores: {logical_cpu_count}")
print(f"Total memory: {total_memory:.2f} GB")


Physical CPU cores: 4
Logical CPU cores: 4
Total memory: 7.20 GB
