<a href="https://colab.research.google.com/github/TheTuanub244/frontEnd/blob/main/oneRf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import numpy as np
import os
import matplotlib.pyplot as plt
import csv
import joblib
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc, precision_recall_curve, PrecisionRecallDisplay, average_precision_score

In [75]:
def load_X(path):
  list_file = os.listdir(path)
  list_file.sort(key=str.lower)
  X_signals = []
  for index in list_file:
    fileName = os.path.join(path, index)
    file = open(fileName, 'r')
    X_signals.append(
            [np.array(cell, dtype=np.float32) for cell in [
                row.strip().split(' ') for row in file
            ]]
    )
    file.close
  return np.transpose(np.array(X_signals), (1, 2, 0))

def load_y(y_path):
    file = open(y_path, 'r')
    y_ = np.array(
        [elem for elem in [
            row.replace('  ', ' ').strip().split(' ') for row in file
        ]],
        dtype=np.int32
    )
    file.close()
    y_ = y_ - 1
    y_ = y_.reshape(len(y_))
    n_values = int(np.max(y_)) + 1
    return np.eye(n_values)[np.array(y_, dtype=np.int32)]


In [76]:
X_train = load_X('Data/LAR_txt/train')
X_test = load_X('Data/LAR_txt/test')
train_label = load_y('Data/LAR_txt/label/trainLabel_2.txt')
test_label = load_y('Data/LAR_txt/label/testLabel_2.txt')

print(X_train.shape)
print(X_test.shape)
print(train_label.shape)
print(test_label.shape)

(8980, 608, 1)
(3842, 608, 1)
(8980, 2)
(3842, 2)


In [77]:
path = 'Data/OU_center_txt/train/label/train_label2.txt'
def read_csv_file(path):
  unique_values = {}
  with open(path, 'r') as file:
    reader = csv.reader(file)
    for row in reader:
      for value in row:
        if value in unique_values:
          unique_values[value] += 1
        else:
          unique_values[value] = 1
  return unique_values
unique_values = read_csv_file(path)
print(unique_values)

{'1': 25701, '2': 78424}


In [106]:
label_train_single = np.argmax(train_label, axis=1)
label_test_single = np.argmax(test_label, axis=1)

X_train_flatten = X_train.reshape((X_train.shape[0], -1))
X_test_flatten = X_test.reshape((X_test.shape[0], -1))


num_label_1 = np.sum(label_train_single == 0)
num_label_2 = np.sum(label_train_single == 1)
num_label_3 = np.sum(label_test_single == 0)
num_label_4 = np.sum(label_test_single == 1)
weight = num_label_2 / num_label_1
class_weight = {0: 1, 1: weight}





In [107]:
rf = RandomForestClassifier(n_estimators=100, class_weight=class_weight)
rf.fit(X_train_flatten, label_train_single)

In [108]:
test_label_probabilities = rf.predict_proba(X_test_flatten)
precision, recall, thresholds = precision_recall_curve(label_test_single, test_label_probabilities[:, 1])
average_precision = average_precision_score(label_test_single, test_label_probabilities[:, 1])

test_label_predictions = (test_label_probabilities[:, 1] > 0.5).astype(int)
test_label_predictions

array([1, 1, 1, ..., 1, 1, 1])

In [109]:
accuracy = accuracy_score(label_test_single, test_label_predictions)
precision = precision_score(label_test_single, test_label_predictions, average='macro')
recall = recall_score(label_test_single, test_label_predictions, average='macro')
f1 = f1_score(label_test_single, test_label_predictions, average='macro')
conf_matrix = confusion_matrix(label_test_single, test_label_predictions)
print("Test Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:")
print(conf_matrix)

Test Accuracy: 0.9362311296199896
Precision: 0.9283063274039484
Recall: 0.9336073561749783
F1 Score: 0.9308364992889229
Confusion Matrix:
[[1262  103]
 [ 142 2335]]


In [115]:
accuracy = accuracy_score(label_test_single, test_label_predictions)
precision = precision_score(label_test_single, test_label_predictions, average='binary')
recall = recall_score(label_test_single, test_label_predictions, average='binary')
f1 = f1_score(label_test_single, test_label_predictions, average='binary')
conf_matrix = confusion_matrix(label_test_single, test_label_predictions)
print("Test Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:")
print(conf_matrix)

Test Accuracy: 0.9362311296199896
Precision: 0.9577522559474979
Recall: 0.942672587807832
F1 Score: 0.9501525940996948
Confusion Matrix:
[[1262  103]
 [ 142 2335]]


In [113]:
with open ('results/LAR/2/KQ/RfBinary.csv', mode='a', newline='') as file:
  writer = csv.writer(file)
  writer.writerow(['metric', 'Rf'])
  writer.writerow(['accuracy', accuracy])
  writer.writerow(['precision', precision])
  writer.writerow(['recall', recall])
  writer.writerow(['f1', f1])
  writer.writerow(['confusion_matrix', conf_matrix])