In [19]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,precision_score, recall_score, f1_score, auc, roc_curve
import pickle

class RFClassifier:
  def __init__(self,models_path,estimators_n,jobs_n,):
    self.models_path = models_path
    self.rf_classifier = RandomForestClassifier(n_estimators=estimators_n,  n_jobs=jobs_n)

  def multiple_training(self,estimators_n,jobs_n, x_train, y_train, x_test, y_test):
    test_score_RFC=[]
    RFCs=[]
    for n in estimators_n:
        clf = RandomForestClassifier(n_estimators= int(n), n_jobs= jobs_n)
        clf.fit(x_train, np.ravel(y_train))
        y_pred = clf.predict(x_test)
        scores = self.scores(np.ravel(y_pred), np.ravel(y_test))
        test_score_RFC.append(scores)  
        RFCs.append(clf)
    for neighbor, tr_sc in zip((estimators_n),test_score_RFC): 
        print(f"Estimator = {neighbor}")
        print('Accuracy: {:.4f}, Precision: {:.4f}, Recall: {:.4f}, F1: {:.4f}'.format(tr_sc[0],tr_sc[1],tr_sc[2],tr_sc[3]))
    return RFCs

  def tain(self, x_train, y_train):
    # Train a Random Forest classifier on the training set
    self.rf_classifier.fit(x_train, np.ravel(y_train))
     
  def predict(self, x_test):
    # Evaluate the performance of the classifier on the testing set
    y_pred = self.rf_classifier.predict(x_test)
    return y_pred

  def scores(self, y_test, y_pred):     
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    return [accuracy,prec,rec,f1]

  def print_scores(self,acc,prec,rec,f1):
    print("Accuracy: {}".format(acc))
    print("Precision: {}".format(prec))
    print("Recall: {}".format(rec))
    print("F1: {}".format(f1))
     
  def save_model(self, file_name):
    # save the model
    with open(self.models_path+file_name, 'wb') as f:
        pickle.dump(self.rf_classifier, f)

  def load_model(self,file_name):
    # Load the model                                                                                                                                                                                                       
    with open(self.models_path+file_name) as f:
        self.rf_classifier = pickle.load(f)
    return self.rf_classifier

## Example

In [2]:
import sys
sys.path.insert(0,"/content/drive/MyDrive/ITMO-Master's/Thesis/3rd_semester")

In [3]:
import DataClass 
from DataClass import Data
parquet_path = "/content/drive/MyDrive/ITMO-Master's/Thesis/3rd_semester/Data/parquet_files/"
# read prepare data
data = Data( 0, "", "",parquet_path)

In [5]:
from sklearn.model_selection import train_test_split
d=data.return_array_df(data.scaled_right_stances,'new_y',['r_swings_time','max_peak','fwhm_value'])
y = data.scaled_right_stances[[ 'y']]
X_train, X_test, y_train, y_test = train_test_split(d, y, random_state=42)

In [20]:
rfc = RFClassifier("",200,2)
rfc.tain(X_train,y_train)
y_pred=rfc.test(X_test)
scores=rfc.scores(y_test, y_pred)

In [11]:
rfc.print_scores(scores[0],scores[1],scores[2],scores[3])

Accuracy: 0.9260879072818718
Precision: 0.9195919591959196
Recall: 0.9776714513556619
F1: 0.9477427334570192


In [21]:
ccs=rfc.multiple_training([5, 10, 20, 40, 100, 200],2,X_train, y_train, X_test, y_test)

Estimator = 5
Accuracy: 0.8767, Precision: 0.9346, Recall: 0.8908, F1: 0.9122
Estimator = 10
Accuracy: 0.8972, Precision: 0.9368, Recall: 0.9152, F1: 0.9259
Estimator = 20
Accuracy: 0.9152, Precision: 0.9525, Recall: 0.9259, F1: 0.9390
Estimator = 40
Accuracy: 0.9176, Precision: 0.9659, Recall: 0.9181, F1: 0.9414
Estimator = 100
Accuracy: 0.9263, Precision: 0.9780, Recall: 0.9196, F1: 0.9479
Estimator = 200
Accuracy: 0.9261, Precision: 0.9764, Recall: 0.9206, F1: 0.9477
