### Importing Modules

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import matplotlib.patches as mpatches
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import train_test_split
import pickle
import random

# Scores
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score

# Classifiers
from sklearn.svm import SVC

# Ignoring Errors
import warnings
warnings.simplefilter('ignore')

### Estimating Taxonomic Class

In [None]:
def find_val_score(k):
    f1_scores_train, f1_scores_test = [], []
    
    data = combined_datas[str(k)]
    std = StandardScaler()
    X = np.array(data.iloc[:,2:]) # X is the barcode sequences
    X = std.fit_transform(X)
    species = np.ravel(data.iloc[:,1])
    y = np.ravel(data.iloc[:,0]) # y is the taxonomic classes

    kf = StratifiedKFold(10, random_state=0, shuffle=True)
    
    for train_index, test_index in kf.split(X, species):
        predictions_train, predictions_test = [], []
        
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        species_train, species_test = species[train_index], species[test_index]
        
        # Keeping Taxonomy Classes to Predict species
        Aves_train = X_train[y_train == 'Aves',:]
        Aves_test = X_test[y_test == 'Aves',:]
        
        Chiroptera_train = X_train[y_train == 'Chiroptera',:]
        Chiroptera_test = X_test[y_test == 'Chiroptera',:]
        
        Rodentia_train = X_train[y_train == 'Rodentia',:]
        Rodentia_test = X_test[y_test == 'Rodentia',:]
        
        
        # Classifiers
        linear_svm_taxonomy = SVC(gamma=1/X_train.shape[0],
                    kernel="linear", probability=True, random_state=0)
        linear_svm_taxonomy.fit(X_train, y_train)
        
        linear_svm_aves = SVC(gamma=1/Aves_train.shape[0],
                kernel="linear", probability=True, random_state=0)
        linear_svm_aves.fit(Aves_train, species_train[y_train == 'Aves'])
        
        linear_svm_chiroptera = SVC(gamma=1/Chiroptera_train.shape[0],
                kernel="linear", probability=True, random_state=0)
        linear_svm_chiroptera.fit(Chiroptera_train, species_train[y_train == 'Chiroptera'])
            
        linear_svm_rodentia = SVC(gamma=1/Rodentia_train.shape[0],
                kernel="linear", probability=True, random_state=0)
        linear_svm_rodentia.fit(Rodentia_train, species_train[y_train == 'Rodentia'])
        
        y_pred_train = linear_svm_taxonomy.predict(X_train)
        y_pred_test = linear_svm_taxonomy.predict(X_test)
        
        
        # Predict the Species
        for idx in range(X_train.shape[0]):
                
            if y_pred_train[idx] == 'Aves':
                y_pred_train_species = linear_svm_aves.predict(X_train[idx, :].reshape(1,-1))
                predictions_train.append(y_pred_train_species)
                
            elif y_pred_train[idx] == 'Chiroptera':
                
                y_pred_train_species = linear_svm_chiroptera.predict(X_train[idx, :].reshape(1,-1))
                predictions_train.append(y_pred_train_species)
                
            elif y_pred_train[idx] == 'Rodentia':
                
                y_pred_train_species = linear_svm_rodentia.predict(X_train[idx, :].reshape(1,-1))
                predictions_train.append(y_pred_train_species)
            
            if idx < X_test.shape[0]:
                    
                if y_pred_test[idx] == 'Aves':
                
                    y_pred_test_species = linear_svm_aves.predict(X_test[idx, :].reshape(1,-1))
                    predictions_test.append(y_pred_test_species)
                
                elif y_pred_test[idx] == 'Chiroptera':

                    y_pred_test_species = linear_svm_chiroptera.predict(X_test[idx, :].reshape(1,-1))
                    predictions_test.append(y_pred_test_species)

                elif y_pred_test[idx] == 'Rodentia':

                    y_pred_test_species = linear_svm_rodentia.predict(X_test[idx, :].reshape(1,-1))
                    predictions_test.append(y_pred_test_species)
                
                
        f1_scores_train.append(f1_score(species_train, predictions_train, pos_label=None, average='micro'))
        f1_scores_test.append(f1_score(species_test, predictions_test, pos_label=None, average='micro'))
            
    
    f1_scores_train = np.ravel(f1_scores_train)
    f1_scores_test = np.ravel(f1_scores_test)
    
    return (f1_scores_train, f1_scores_test)

### To obtain Combined Data-Set

In [None]:
data_names = ['Aves', 'Chiroptera', 'Rodentia']
combined_datas = {}
for k in range(1,8):
    before_combine_df = []
    for data in data_names:
        csv_file = pd.read_csv("{}.Cleaned.k{}.csv".format(data, k))
        csv_file = csv_file.drop(['Unnamed: 0', 'nucleotides'],1)
        taxonomies = pd.DataFrame(np.ravel([data for i in range(csv_file.shape[0])]), columns = ["taxonomic_class"])
        taxonomy_class_added = pd.concat([taxonomies, csv_file], axis=1)
        before_combine_df.append(taxonomy_class_added)
        
    combined_datas[str(k)] = pd.concat(before_combine_df, axis=0)
    print(k)

### Starting to Hierarchical Classification

In [None]:
hierarchical_scores = {}
for k in range(1,8):
    f1_scores_ = find_val_score(k)
    hierarchical_scores[str(k)] = f1_scores_
    print(k)

### Storing The Scores

In [None]:
with open('f1_scores.db','wb') as score:
    pickle.dump(hierarchical_scores, score)

In [None]:
print(hierarchical_scores)