In [11]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from sklearn.feature_extraction.text import CountVectorizer

import matplotlib.pyplot as plt
from collections import Counter

from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO

import os
import pandas as pd

import numpy as np
from imblearn.over_sampling import SMOTE

In [None]:
files = []
folder = "proba2/direct"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)

labels = []
repeats = []

# Read data from each file
for file in files:
    with open(file, "r") as f:
        for line in f:
            parts = line.strip().split(",")
            if len(parts) >= 2:
                label = parts[0]
                repeat = parts[1]
                labels.append(label)
                repeats.append(repeat)

files = []
folder = "trainSet"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)

labelTrain = []
sequencesTrain = []

# Read data from each file
for file in files:
    records = SeqIO.parse(file, "fasta")
    for record in records:
        sequencesTrain.append(record.seq)
        labelTrain.append(file.replace("trainSet/", "").replace(".fasta", ""))

labelTest = []
sequencesTest = []

files = []
folder = "testSet"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)


# Read data from each file
for file in files:
    records = SeqIO.parse(file, "fasta")
    for record in records:
        sequencesTest.append(record.seq)
        labelTest.append(file.replace("testSet/", "").replace(".fasta", ""))

known_repeats = list(set(repeats))
print(f"Number of unique repeats: {len(known_repeats)}")

def extract_repeat_counts(sequence, repeat_list):
    return [sequence.count(r) for r in repeat_list]

X_train = [extract_repeat_counts(seq, known_repeats) for seq in sequencesTrain]
y_train = labelTrain

X_test = [extract_repeat_counts(seq, known_repeats) for seq in sequencesTest]
y_test = labelTest

encoder = LabelEncoder()
y_train_enc = encoder.fit_transform(y_train)
y_test_enc = encoder.transform(y_test)

X_train_np = np.array(X_train)

smote = SMOTE(random_state=42, k_neighbors=2)
X_train_res, y_train_enc_res = smote.fit_resample(X_train_np, y_train_enc)

nb = MultinomialNB()
nb.fit(X_train_res, y_train_enc_res)

# Predict
y_pred_enc = nb.predict(X_test)
y_pred = encoder.inverse_transform(y_pred_enc)

# Report
print("Classification report: ")
print(classification_report(y_test, y_pred))

print("Accuracy score: " + str(accuracy_score(y_test, y_pred)))

In [None]:
# rezultati:
"""
Classification report: 
              precision    recall  f1-score   support

   HCoV-229E       0.99      0.99      0.99       153
   HCoV-HKU1       0.86      1.00      0.93        99
   HCoV-NL63       1.00      0.99      0.99       214
   HCoV-OC43       1.00      0.95      0.97       366
         IBV       1.00      1.00      1.00      3153
    MERS-CoV       1.00      1.00      1.00       395
    SARS-CoV       0.38      1.00      0.55         3
   SARS-CoV2       1.00      1.00      1.00      2133
     bat-CoV       0.72      0.81      0.76        16
  bovine-CoV       0.98      0.99      0.99       378
  canine-CoV       0.89      0.93      0.91       219
 dolphin-CoV       1.00      1.00      1.00         3
  equine-CoV       1.00      0.86      0.92         7
  feline-CoV       0.97      0.95      0.96       536
  ferret-CoV       1.00      1.00      1.00        13
hedgehog-CoV       1.00      1.00      1.00         6
 porcine-CoV       1.00      1.00      1.00       132
  rabbit-CoV       1.00      1.00      1.00         6
     rat-CoV       0.89      1.00      0.94         8
  turkey-CoV       0.82      1.00      0.90        27

    accuracy                           0.99      7867
   macro avg       0.92      0.97      0.94      7867
weighted avg       0.99      0.99      0.99      7867

Accuracy score: 0.989195373077412
"""

In [None]:
files = []
folder = "proba2/indirect"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)

labels = []
repeats = []

# Read data from each file
for file in files:
    with open(file, "r") as f:
        for line in f:
            parts = line.strip().split(",")
            if len(parts) >= 2:
                label = parts[0]
                repeat = parts[1]
                labels.append(label)
                repeats.append(repeat)

files = []
folder = "trainSet"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)

labelTrain = []
sequencesTrain = []

# Read data from each file
for file in files:
    records = SeqIO.parse(file, "fasta")
    for record in records:
        sequencesTrain.append(record.seq)
        labelTrain.append(file.replace("trainSet/", "").replace(".fasta", ""))

labelTest = []
sequencesTest = []

files = []
folder = "testSet"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)


# Read data from each file
for file in files:
    records = SeqIO.parse(file, "fasta")
    for record in records:
        sequencesTest.append(record.seq)
        labelTest.append(file.replace("testSet/", "").replace(".fasta", ""))

known_repeats = list(set(repeats))
print(f"Number of unique repeats: {len(known_repeats)}")

def extract_repeat_counts(sequence, repeat_list):
    return [sequence.count(r) for r in repeat_list]

X_train = [extract_repeat_counts(seq, known_repeats) for seq in sequencesTrain]
y_train = labelTrain

X_test = [extract_repeat_counts(seq, known_repeats) for seq in sequencesTest]
y_test = labelTest

encoder = LabelEncoder()
y_train_enc = encoder.fit_transform(y_train)
y_test_enc = encoder.transform(y_test)

X_train_np = np.array(X_train)

smote = SMOTE(random_state=42, k_neighbors=2)
X_train_res, y_train_enc_res = smote.fit_resample(X_train_np, y_train_enc)

nb = MultinomialNB()
nb.fit(X_train_res, y_train_enc_res)

# Predict
y_pred_enc = nb.predict(X_test)
y_pred = encoder.inverse_transform(y_pred_enc)

# Report
print("Classification report: ")
print(classification_report(y_test, y_pred))

print("Accuracy score: " + str(accuracy_score(y_test, y_pred)))

In [None]:
# rezultat
"""
Classification report: 
              precision    recall  f1-score   support

   HCoV-229E       0.70      0.98      0.82       153
   HCoV-HKU1       0.84      1.00      0.91        99
   HCoV-NL63       1.00      0.98      0.99       214
   HCoV-OC43       1.00      0.91      0.95       366
         IBV       1.00      1.00      1.00      3153
    MERS-CoV       1.00      0.99      0.99       395
    SARS-CoV       0.75      1.00      0.86         3
   SARS-CoV2       1.00      1.00      1.00      2133
     bat-CoV       0.67      0.88      0.76        16
  bovine-CoV       0.95      0.99      0.97       378
  canine-CoV       0.91      0.93      0.92       219
 dolphin-CoV       0.75      1.00      0.86         3
  equine-CoV       0.86      0.86      0.86         7
  feline-CoV       0.97      0.84      0.90       536
  ferret-CoV       0.93      1.00      0.96        13
hedgehog-CoV       1.00      1.00      1.00         6
 porcine-CoV       0.92      1.00      0.96       132
  rabbit-CoV       0.56      0.83      0.67         6
     rat-CoV       0.89      1.00      0.94         8
  turkey-CoV       0.84      1.00      0.92        27

    accuracy                           0.98      7867
   macro avg       0.88      0.96      0.91      7867
weighted avg       0.98      0.98      0.98      7867

Accuracy score: 0.97775517986526
"""

In [12]:
files = []
folder = "../proba2/direct"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)

labels = []
repeats = []

# Read data from each file
for file in files:
    with open(file, "r") as f:
        for line in f:
            parts = line.strip().split(",")
            if len(parts) >= 2:
                label = parts[0]
                repeat = parts[1]
                labels.append(label)
                repeats.append(repeat)

files = []
folder = "../trainSet"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)

labelTrain = []
sequencesTrain = []

# Read data from each file
for file in files:
    records = SeqIO.parse(file, "fasta")
    for record in records:
        sequencesTrain.append(record.seq)
        labelTrain.append(file.replace("../trainSet/", "").replace(".fasta", ""))

labelTest = []
sequencesTest = []

files = []
folder = "../testSet"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)


# Read data from each file
for file in files:
    records = SeqIO.parse(file, "fasta")
    for record in records:
        sequencesTest.append(record.seq)
        labelTest.append(file.replace("../testSet/", "").replace(".fasta", ""))

known_repeats = list(set(repeats))
print(f"Number of unique repeats: {len(known_repeats)}")

def extract_repeat_counts(sequence, repeat_list):
    return [sequence.count(r) for r in repeat_list]

X_train = [extract_repeat_counts(seq, known_repeats) for seq in sequencesTrain]
y_train = labelTrain

X_test = [extract_repeat_counts(seq, known_repeats) for seq in sequencesTest]
y_test = labelTest

encoder = LabelEncoder()
y_train_enc = encoder.fit_transform(y_train)
y_test_enc = encoder.transform(y_test)

# Train Multinomial Naive Bayes
nb = MultinomialNB()
nb.fit(X_train, y_train_enc)

# Predict
y_pred_enc = nb.predict(X_test)
y_pred = encoder.inverse_transform(y_pred_enc)

# Report
print("Classification report: ")
print(classification_report(y_test, y_pred))

print("Accuracy score: " + str(accuracy_score(y_test, y_pred)))

Number of unique repeats: 798
Classification report: 
              precision    recall  f1-score   support

    MERS-CoV       1.00      0.98      0.99       395
    SARS-CoV       0.00      0.00      0.00         3
     bat-CoV       0.67      1.00      0.80        16
  bovine-CoV       1.00      0.99      1.00       378
  canine-CoV       0.82      0.89      0.86       219
  feline-CoV       0.95      0.92      0.94       536
hedgehog-CoV       0.75      1.00      0.86         6
 porcine-CoV       1.00      1.00      1.00       132

    accuracy                           0.95      1685
   macro avg       0.77      0.85      0.80      1685
weighted avg       0.96      0.95      0.95      1685

Accuracy score: 0.9543026706231454


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
files = []
folder = "../proba2/indirect"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)

labels = []
repeats = []

# Read data from each file
for file in files:
    with open(file, "r") as f:
        for line in f:
            parts = line.strip().split(",")
            if len(parts) >= 2:
                label = parts[0]
                repeat = parts[1]
                labels.append(label)
                repeats.append(repeat)

files = []
folder = "../trainSet"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)

labelTrain = []
sequencesTrain = []

# Read data from each file
for file in files:
    records = SeqIO.parse(file, "fasta")
    for record in records:
        sequencesTrain.append(record.seq)
        labelTrain.append(file.replace("../trainSet/", "").replace(".fasta", ""))

labelTest = []
sequencesTest = []

files = []
folder = "../testSet"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)


# Read data from each file
for file in files:
    records = SeqIO.parse(file, "fasta")
    for record in records:
        sequencesTest.append(record.seq)
        labelTest.append(file.replace("../testSet/", "").replace(".fasta", ""))

known_repeats = list(set(repeats))
print(f"Number of unique repeats: {len(known_repeats)}")

def extract_repeat_counts(sequence, repeat_list):
    return [sequence.count(r) for r in repeat_list]

X_train = [extract_repeat_counts(seq, known_repeats) for seq in sequencesTrain]
y_train = labelTrain

X_test = [extract_repeat_counts(seq, known_repeats) for seq in sequencesTest]
y_test = labelTest

encoder = LabelEncoder()
y_train_enc = encoder.fit_transform(y_train)
y_test_enc = encoder.transform(y_test)

# Train Multinomial Naive Bayes
nb = MultinomialNB()
nb.fit(X_train, y_train_enc)

# Predict
y_pred_enc = nb.predict(X_test)
y_pred = encoder.inverse_transform(y_pred_enc)

# Report
print("Classification report: ")
print(classification_report(y_test, y_pred))

print("Accuracy score: " + str(accuracy_score(y_test, y_pred)))

Number of unique repeats: 1007
Classification report: 
              precision    recall  f1-score   support

    MERS-CoV       0.98      0.98      0.98       395
    SARS-CoV       1.00      1.00      1.00         3
     bat-CoV       0.76      1.00      0.86        16
  bovine-CoV       0.99      0.99      0.99       378
  canine-CoV       0.90      0.88      0.89       219
  feline-CoV       0.96      0.95      0.96       536
hedgehog-CoV       0.62      0.83      0.71         6
 porcine-CoV       1.00      1.00      1.00       132

    accuracy                           0.96      1685
   macro avg       0.90      0.96      0.93      1685
weighted avg       0.96      0.96      0.96      1685

Accuracy score: 0.9632047477744807


In [14]:
files = []
folder = "../proba3/DC"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)

labels = []
repeats = []

# Read data from each file
for file in files:
    with open(file, "r") as f:
        for line in f:
            parts = line.strip().split(",")
            if len(parts) >= 2:
                label = parts[0]
                repeat = parts[1]
                labels.append(label)
                repeats.append(repeat)

files = []
folder = "../trainSetNucl"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)

labelTrain = []
sequencesTrain = []

# Read data from each file
for file in files:
    records = SeqIO.parse(file, "fasta")
    for record in records:
        sequencesTrain.append(record.seq)
        labelTrain.append(file.replace("../trainSetNucl/", "").replace(".fasta", ""))

labelTest = []
sequencesTest = []

files = []
folder = "../testSetNucl"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)


# Read data from each file
for file in files:
    records = SeqIO.parse(file, "fasta")
    for record in records:
        sequencesTest.append(record.seq)
        labelTest.append(file.replace("../testSetNucl/", "").replace(".fasta", ""))

known_repeats = list(set(repeats))
print(f"Number of unique repeats: {len(known_repeats)}")

def extract_repeat_counts(sequence, repeat_list):
    return [sequence.count(r) for r in repeat_list]

X_train = [extract_repeat_counts(seq, known_repeats) for seq in sequencesTrain]
y_train = labelTrain

X_test = [extract_repeat_counts(seq, known_repeats) for seq in sequencesTest]
y_test = labelTest

encoder = LabelEncoder()
y_train_enc = encoder.fit_transform(y_train)
y_test_enc = encoder.transform(y_test)

# Train Multinomial Naive Bayes
nb = MultinomialNB()
nb.fit(X_train, y_train_enc)

# Predict
y_pred_enc = nb.predict(X_test)
y_pred = encoder.inverse_transform(y_pred_enc)

# Report
print("Classification report: ")
print(classification_report(y_test, y_pred))

print("Accuracy score: " + str(accuracy_score(y_test, y_pred)))

Number of unique repeats: 4335
Classification report: 
              precision    recall  f1-score   support

    MERS-CoV       0.99      0.99      0.99       395
    SARS-CoV       0.60      1.00      0.75         3
     bat-CoV       1.00      0.93      0.97        15
  bovine-CoV       1.00      1.00      1.00       375
  canine-CoV       0.91      0.98      0.94       218
  feline-CoV       0.99      0.93      0.96       536
hedgehog-CoV       0.86      1.00      0.92         6
 porcine-CoV       0.90      1.00      0.95       132

    accuracy                           0.97      1680
   macro avg       0.91      0.98      0.93      1680
weighted avg       0.97      0.97      0.97      1680

Accuracy score: 0.9714285714285714


In [15]:
files = []
folder = "../proba3/DN"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)

labels = []
repeats = []

# Read data from each file
for file in files:
    with open(file, "r") as f:
        for line in f:
            parts = line.strip().split(",")
            if len(parts) >= 2:
                label = parts[0]
                repeat = parts[1]
                labels.append(label)
                repeats.append(repeat)

files = []
folder = "../trainSetNucl"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)

labelTrain = []
sequencesTrain = []

# Read data from each file
for file in files:
    records = SeqIO.parse(file, "fasta")
    for record in records:
        sequencesTrain.append(record.seq)
        labelTrain.append(file.replace("../trainSetNucl/", "").replace(".fasta", ""))

labelTest = []
sequencesTest = []

files = []
folder = "../testSetNucl"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)


# Read data from each file
for file in files:
    records = SeqIO.parse(file, "fasta")
    for record in records:
        sequencesTest.append(record.seq)
        labelTest.append(file.replace("../testSetNucl/", "").replace(".fasta", ""))

known_repeats = list(set(repeats))
print(f"Number of unique repeats: {len(known_repeats)}")

def extract_repeat_counts(sequence, repeat_list):
    return [sequence.count(r) for r in repeat_list]

X_train = [extract_repeat_counts(seq, known_repeats) for seq in sequencesTrain]
y_train = labelTrain

X_test = [extract_repeat_counts(seq, known_repeats) for seq in sequencesTest]
y_test = labelTest

encoder = LabelEncoder()
y_train_enc = encoder.fit_transform(y_train)
y_test_enc = encoder.transform(y_test)

# Train Multinomial Naive Bayes
nb = MultinomialNB()
nb.fit(X_train, y_train_enc)

# Predict
y_pred_enc = nb.predict(X_test)
y_pred = encoder.inverse_transform(y_pred_enc)

# Report
print("Classification report: ")
print(classification_report(y_test, y_pred))

print("Accuracy score: " + str(accuracy_score(y_test, y_pred)))

Number of unique repeats: 6309
Classification report: 
              precision    recall  f1-score   support

    MERS-CoV       1.00      0.99      1.00       395
    SARS-CoV       0.27      1.00      0.43         3
     bat-CoV       1.00      0.93      0.97        15
  bovine-CoV       1.00      1.00      1.00       375
  canine-CoV       0.90      0.94      0.92       218
  feline-CoV       0.99      0.96      0.97       536
hedgehog-CoV       0.86      1.00      0.92         6
 porcine-CoV       1.00      1.00      1.00       132

    accuracy                           0.98      1680
   macro avg       0.88      0.98      0.90      1680
weighted avg       0.98      0.98      0.98      1680

Accuracy score: 0.9773809523809524


In [16]:
files = []
folder = "../proba3/IN"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)

labels = []
repeats = []

# Read data from each file
for file in files:
    with open(file, "r") as f:
        for line in f:
            parts = line.strip().split(",")
            if len(parts) >= 2:
                label = parts[0]
                repeat = parts[1]
                labels.append(label)
                repeats.append(repeat)

files = []
folder = "../trainSetNucl"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)

labelTrain = []
sequencesTrain = []

# Read data from each file
for file in files:
    records = SeqIO.parse(file, "fasta")
    for record in records:
        sequencesTrain.append(record.seq)
        labelTrain.append(file.replace("../trainSetNucl/", "").replace(".fasta", ""))

labelTest = []
sequencesTest = []

files = []
folder = "../testSetNucl"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)


# Read data from each file
for file in files:
    records = SeqIO.parse(file, "fasta")
    for record in records:
        sequencesTest.append(record.seq)
        labelTest.append(file.replace("../testSetNucl/", "").replace(".fasta", ""))

known_repeats = list(set(repeats))
print(f"Number of unique repeats: {len(known_repeats)}")

def extract_repeat_counts(sequence, repeat_list):
    return [sequence.count(r) for r in repeat_list]

X_train = [extract_repeat_counts(seq, known_repeats) for seq in sequencesTrain]
y_train = labelTrain

X_test = [extract_repeat_counts(seq, known_repeats) for seq in sequencesTest]
y_test = labelTest

encoder = LabelEncoder()
y_train_enc = encoder.fit_transform(y_train)
y_test_enc = encoder.transform(y_test)

# Train Multinomial Naive Bayes
nb = MultinomialNB()
nb.fit(X_train, y_train_enc)

# Predict
y_pred_enc = nb.predict(X_test)
y_pred = encoder.inverse_transform(y_pred_enc)

# Report
print("Classification report: ")
print(classification_report(y_test, y_pred))

print("Accuracy score: " + str(accuracy_score(y_test, y_pred)))

Number of unique repeats: 4897
Classification report: 
              precision    recall  f1-score   support

    MERS-CoV       1.00      0.99      1.00       395
    SARS-CoV       0.75      1.00      0.86         3
     bat-CoV       1.00      0.93      0.97        15
  bovine-CoV       1.00      1.00      1.00       375
  canine-CoV       0.90      0.91      0.91       218
  feline-CoV       0.96      0.96      0.96       536
hedgehog-CoV       0.86      1.00      0.92         6
 porcine-CoV       1.00      1.00      1.00       132

    accuracy                           0.97      1680
   macro avg       0.93      0.98      0.95      1680
weighted avg       0.97      0.97      0.97      1680

Accuracy score: 0.9744047619047619


In [17]:
files = []
folder = "../proba3/IC"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)

labels = []
repeats = []

# Read data from each file
for file in files:
    with open(file, "r") as f:
        for line in f:
            parts = line.strip().split(",")
            if len(parts) >= 2:
                label = parts[0]
                repeat = parts[1]
                labels.append(label)
                repeats.append(repeat)

files = []
folder = "../trainSetNucl"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)

labelTrain = []
sequencesTrain = []

# Read data from each file
for file in files:
    records = SeqIO.parse(file, "fasta")
    for record in records:
        sequencesTrain.append(record.seq)
        labelTrain.append(file.replace("../trainSetNucl/", "").replace(".fasta", ""))

labelTest = []
sequencesTest = []

files = []
folder = "../testSetNucl"

for file in os.listdir(folder):
    if file.startswith("."):
        continue
    files.append(folder + "/" + file)


# Read data from each file
for file in files:
    records = SeqIO.parse(file, "fasta")
    for record in records:
        sequencesTest.append(record.seq)
        labelTest.append(file.replace("../testSetNucl/", "").replace(".fasta", ""))

known_repeats = list(set(repeats))
print(f"Number of unique repeats: {len(known_repeats)}")

def extract_repeat_counts(sequence, repeat_list):
    return [sequence.count(r) for r in repeat_list]

X_train = [extract_repeat_counts(seq, known_repeats) for seq in sequencesTrain]
y_train = labelTrain

X_test = [extract_repeat_counts(seq, known_repeats) for seq in sequencesTest]
y_test = labelTest

encoder = LabelEncoder()
y_train_enc = encoder.fit_transform(y_train)
y_test_enc = encoder.transform(y_test)

# Train Multinomial Naive Bayes
nb = MultinomialNB()
nb.fit(X_train, y_train_enc)

# Predict
y_pred_enc = nb.predict(X_test)
y_pred = encoder.inverse_transform(y_pred_enc)

# Report
print("Classification report: ")
print(classification_report(y_test, y_pred))

print("Accuracy score: " + str(accuracy_score(y_test, y_pred)))

Number of unique repeats: 5163
Classification report: 
              precision    recall  f1-score   support

    MERS-CoV       1.00      0.99      1.00       395
    SARS-CoV       0.75      1.00      0.86         3
     bat-CoV       1.00      0.93      0.97        15
  bovine-CoV       1.00      1.00      1.00       375
  canine-CoV       0.91      0.98      0.94       218
  feline-CoV       0.99      0.96      0.97       536
hedgehog-CoV       0.86      1.00      0.92         6
 porcine-CoV       1.00      1.00      1.00       132

    accuracy                           0.98      1680
   macro avg       0.94      0.98      0.96      1680
weighted avg       0.98      0.98      0.98      1680

Accuracy score: 0.9827380952380952
