In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score, cross_val_predict, StratifiedKFold, train_test_split
from sklearn.metrics import confusion_matrix, roc_auc_score, accuracy_score, f1_score, precision_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

In [2]:
datasets = {
    'df_cds_motifs': pd.read_csv('data/processed/df_cds_motifs.csv'),
    'df_cds_smcog': pd.read_csv('data/processed/df_cds_smcog.csv'),
    'df_pfam_desc': pd.read_csv('data/processed/df_pfam_desc.csv'),
    'df_pfam_go': pd.read_csv('data/processed/df_pfam_go.csv'),
    'df_pfam_id': pd.read_csv('data/processed/df_pfam_id.csv')
}

for key, df in datasets.items():
    print(df.shape)

(229, 87)
(229, 243)
(229, 1558)
(229, 466)
(229, 1654)


In [3]:
merged_df = pd.DataFrame()
for key, df in datasets.items():
    if merged_df.empty:
        merged_df = df
    else:
        merged_df = merged_df.join(df.set_index(['nome', 'acc', 'label']), on=['nome', 'acc', 'label'])

# Reset the index of the merged DataFrame
merged_df = merged_df.reset_index(drop=True)

merged_df['acc']

0      NZ_CP023748_01
1      NZ_CP023748_02
2      NZ_CP023748_03
3      NZ_CP023748_04
4      NZ_CP023748_05
            ...      
224    NZ_CP040514_07
225    NZ_CP040514_08
226    NZ_CP040514_09
227    NZ_CP040514_10
228    NZ_CP040514_11
Name: acc, Length: 229, dtype: object

Antifungi

In [4]:
steps = [('scaler', StandardScaler()), ('model', SVC(C = 10, gamma = 0.0001, kernel = 'rbf', probability = True, random_state = 0))]
model = Pipeline(steps = steps)

merged_df = datasets['df_pfam_id']

y = merged_df.pop('label')
merged_df.pop('nome')
merged_df.pop('acc')
X = merged_df

scores_acc = cross_val_score(model, X, y, cv = 10, scoring = 'accuracy', n_jobs = -1)

print(y.value_counts())
print('ACC: {:.4f} +- {:.4f}'.format(scores_acc.mean(), scores_acc.std()))

label
1    142
0     87
Name: count, dtype: int64
ACC: 0.7109 +- 0.1354
