## 1. RandomForestClassificationCustom

In [1]:
from sklearn.datasets import make_classification
from custom_random_forest import RandomForestClassifierCustom
import time
import numpy as np

X, y = make_classification(n_samples=100000)
random_forest_single = RandomForestClassifierCustom(max_depth=30, n_estimators=10, max_features=2, random_state=42)
random_forest_multi = RandomForestClassifierCustom(max_depth=30, n_estimators=10, max_features=2, random_state=42)


#### Measure fit time

In [2]:
# Time the training process for single-threaded execution
start_time = time.time()
random_forest_single.fit(X, y, n_jobs=1)
single_thread_fit_time = time.time() - start_time

In [3]:
# Time the training process for multi-threaded execution
start_time = time.time()
random_forest_multi.fit(X, y, n_jobs=2)
multi_thread_fit_time = time.time() - start_time

#### Measure predict time

In [4]:
# Time the prediction process for single-threaded execution
start_time = time.time()
predictions_single = random_forest_single.predict(X)
single_thread_predict_time = time.time() - start_time

In [5]:
# Time the prediction process for multi-threaded execution
start_time = time.time()
predictions_multi = random_forest_multi.predict(X, n_jobs=2)
multi_thread_predict_time = time.time() - start_time

In [6]:
# Check if the predictions from both models are the same
predictions_match = np.array_equal(predictions_single, predictions_multi)

In [7]:
print("Fit time with 1 thread:", single_thread_fit_time)
print("Fit time with 2 threads:", multi_thread_fit_time)
print("Predict time with 1 thread:", single_thread_predict_time)
print("Predict time with 2 threads:", multi_thread_predict_time)
print("Predictions match:", predictions_match)


Fit time with 1 thread: 10.378403425216675
Fit time with 2 threads: 6.107331991195679
Predict time with 1 thread: 1.3444983959197998
Predict time with 2 threads: 1.2403366565704346
Predictions match: True


## 2. OpenFasta usage example

In [8]:
from bio_files_processor import OpenFasta

In [9]:
fasta_file_path = 'data/example_fasta.fasta'
with OpenFasta(fasta_file_path) as fasta:
    for record in fasta:
        print(record)

id = GTD323452 
 description = 5S_rRNA NODE_272_length_223_cov_0.720238:18-129(+) 
 sequence = ACGGCCATAGGACTTTGAAAGCACCGCATCCCGTCCGATCTGCGAAGTTAACCAAGATGCCGCCTGGTTAGTACCATGGTGGGGGACCACATGGGAATCCCTGGTGCTGTG
id = GTD678345 
 description = 16S_rRNA NODE_80_length_720_cov_1.094737:313-719(+) 
 sequence = TTGGCTTCTTAGAGGGACTTTTGATGTTTAATCAAAGGAAGTTTGAGGCAATAACAGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCACGCGCGCTACACTGAGCCCTTGGGAGTGGTCCATTTGAGCCGGCAACGGCACGTTTGGACTGCAAACTTGGGCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGT
id = GTD174893 
 description = 16S_rRNA NODE_1_length_2558431_cov_75.185164:2153860-2155398(+) 
 sequence = TTGAAGAGTTTGATCATGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAACGGTAACAGGAAACAGCTTGCTGTTTCGCTGACGAGTGGGAAGTAGGTAGCTTAACCTTCGGGAGGGCGCTTACCACTTTGTGATTCATGACTGGGGTGAAGTCGTAACAAGGTAACCGTAGGGGAACCTGCGGTTGGATCACCTCCTT
id = GTD906783 
 description = 16S_rRNA NODE_1_length_2558431_cov_75.185164:793941-795479(-) 
 sequence = TTGAAGAGTTTGATCATGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAACGGT

## 3. DNA/RNA/AminoAcidSequence usage examples

In [11]:
from biotools import DNASequence, RNASequence, AminoAcidSequence

In [21]:
dna = DNASequence("ATGCGT")
print(f"1 method that works with DNA: the complementary sequence to {dna} is {dna.complement()}")

1 method that works with DNA: the complementary sequence to ATGCGT is TACGCA


In [22]:
rna = dna.transcribe()
print(f"1 method that works with RNA: %GC in {rna} is {rna.gc_content()}")

1 method that works with RNA: %GC in AUGCGU is 0.5


In [23]:
peptide = AminoAcidSequence('KRKRA')
print(f"1 method that works with proteins: {peptide} in three-letter code is {peptide.one_to_three_letter_code()}")

1 method that works with proteins: KRKRA in three-letter code is Lys-Arg-Lys-Arg-Ala
