In [None]:
# Setting seeds for reproducibility
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
import json
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from imblearn.over_sampling import ADASYN, SMOTE
import pandas as pd
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
SETTING = 3

In [None]:
data_file_path = "../../Features/PCFs/files_for_ml/protein_props.json"
druggable_proteins_file_path = "../../DrugBank/druggable_proteins.txt"
approved_druggable_proteins_file_path = "../../DrugBank/approved_druggable_proteins.txt"

with open(data_file_path, 'r') as f:
    protein_data = json.load(f)

print("Total number of uniprot human verified proteins:", len(protein_data))

# Extracting list of druggable and approved druggable proteins
with open(druggable_proteins_file_path, 'r') as f:
    druggable_proteins = f.read().splitlines()

with open(approved_druggable_proteins_file_path, 'r') as f:
    approved_druggable_proteins = f.read().splitlines()

print("Number of druggable proteins:", len(druggable_proteins))
print("Number of approved druggable proteins:", len(approved_druggable_proteins))


# Fetching feature data for all proteins
properties = (pd.read_json("../../Features/PCFs/files_for_ml/protein_props.json")).transpose()
is_druggable = [1 if i in druggable_proteins else 0 for i in properties.index]
is_approved_druggable = [1 if i in approved_druggable_proteins else 0 for i in properties.index]

properties["is_druggable"] = is_druggable
properties["is_approved_druggable"] = is_approved_druggable

PCP_properties = properties.copy()
amino_acids = 'ACDEFGHIKLMNPQRSTVWY'
amino_acid_percent = {i:[] for i in amino_acids}
for i in PCP_properties['Amino Acid Percent']:
  for aa in amino_acids:
    amino_acid_percent[aa].append(i[aa])
for aa in amino_acids:
  PCP_properties = pd.concat([PCP_properties, pd.Series(amino_acid_percent[aa], index = PCP_properties.index, name = f"Amino Acid Percent {aa}")], axis = 1)

PCP_properties[f"Molar Extinction Coefficient 1"] = pd.Series([x[0] for x in PCP_properties['Molar Extinction Coefficient']], index = PCP_properties.index)
PCP_properties[f"Molar Extinction Coefficient 2"] = pd.Series([x[1] for x in PCP_properties['Molar Extinction Coefficient']], index = PCP_properties.index)

PCP_properties[f"Secondary Structure helix"] = pd.Series([x[0] for x in PCP_properties['Secondary Structure']], index = PCP_properties.index)
PCP_properties[f"Secondary Structure turn"] = pd.Series([x[1] for x in PCP_properties['Secondary Structure']], index = PCP_properties.index)
PCP_properties[f"Secondary Structure sheet"] = pd.Series([x[2] for x in PCP_properties['Secondary Structure']], index = PCP_properties.index)

PCP_properties.drop(columns = ['Amino Acid Count','Amino Acid Percent',"Molar Extinction Coefficient","Flexibility","Secondary Structure",'Sequence'], inplace = True)
PCP_properties['Sequence Length'] = PCP_properties['Sequence Length'].astype(int)
PCP_properties[['Molecular Weight', 'GRAVY', 'Isoelectric Point', 'Instability Index', 'Aromaticity', 'Charge at 7']] = PCP_properties[['Molecular Weight', 'GRAVY', 'Isoelectric Point', 'Instability Index', 'Aromaticity', 'Charge at 7']].astype(float)

with open("/content/drive/MyDrive/protein_props/features/gdpc_encodings.json", 'r') as file:
    data = json.load(file)
gpdc_encodings = pd.DataFrame(data).transpose()

ppi = pd.read_json("../../Features/PPIs/files_for_ml/ppi.json").transpose()
ppi_network = pd.read_csv("../../Features/PPIs/files_for_ml/ppi_network_properties.csv")
ppi_network.index = ppi_network['Unnamed: 0']
ppi_network.drop(columns = ['Unnamed: 0'], inplace = True)
ppi = pd.concat([ppi, ppi_network], axis = 1)

glycolisation = pd.read_csv("../../Features/PTMs/files_for_ml/glycosylation.csv")
glycolisation.index = glycolisation['Unnamed: 0']
glycolisation.drop(columns = ['Unnamed: 0'], inplace = True)
ptm = pd.read_csv("../../Features/PTMs/files_for_ml/PTM_counts.csv")
ptm.index = ptm["Unnamed: 0"]
ptm.drop(columns = ['Unnamed: 0'], inplace = True)
ptm_counts = pd.concat([ptm, glycolisation], axis = 1)

with open("../../Features/SCL/files_for_ml/subcellular_locations2.json", 'r') as file:
    data = json.load(file)
unique_groups = set()
for entry in data.values():
    if "general" in entry:
        for general_entry in entry["general"]:
            if "group" in general_entry: unique_groups.add(general_entry["group"])

unique_groups_list = list(unique_groups)

rows = []
for protein_id in PCP_properties.index:
    row = {group: 0 for group in unique_groups_list}
    if protein_id in data:
        for entry in data[protein_id].get("general", []):
            if "group" in entry and entry["group"] in unique_groups:
                row[entry["group"]] = 1
    row["protein_id"] = protein_id
    rows.append(row)

subcellular_data = pd.DataFrame(rows).set_index("protein_id")

domains = pd.read_csv("../../Features/Domains/files_for_ml/data_top20.csv")
domains.index = domains['Unnamed: 0']
domains.drop(columns = ['Unnamed: 0'], inplace = True)

flexibility = pd.read_csv("../../Features/PCFs/files_for_ml/flexibility_properties.csv")
flexibility.index = flexibility['Unnamed: 0']
flexibility.drop(columns = ['Unnamed: 0'], inplace = True)

latent_data = pd.read_csv("../../Features/Latents/files_for_ml/latent_values.csv").transpose()
latent_data.columns = [f"Latent_Value_{i+1}" for i in latent_data.columns]
final_data = pd.concat([PCP_properties,gpdc_encodings, ptm_counts, ppi, subcellular_data, domains, flexibility, latent_data], axis = 1).dropna()
features_list = final_data.columns
features_list = features_list.drop(['is_druggable','is_approved_druggable'])
features_list = list(features_list)
print(features_list)
print(len(features_list))


Total number of uniprot human verified proteins: 20434
Number of druggable proteins: 3345
Number of approved druggable proteins: 2652
['Sequence Length', 'Molecular Weight', 'GRAVY', 'Isoelectric Point', 'Instability Index', 'Aromaticity', 'Charge at 7', 'Amino Acid Percent A', 'Amino Acid Percent C', 'Amino Acid Percent D', 'Amino Acid Percent E', 'Amino Acid Percent F', 'Amino Acid Percent G', 'Amino Acid Percent H', 'Amino Acid Percent I', 'Amino Acid Percent K', 'Amino Acid Percent L', 'Amino Acid Percent M', 'Amino Acid Percent N', 'Amino Acid Percent P', 'Amino Acid Percent Q', 'Amino Acid Percent R', 'Amino Acid Percent S', 'Amino Acid Percent T', 'Amino Acid Percent V', 'Amino Acid Percent W', 'Amino Acid Percent Y', 'Molar Extinction Coefficient 1', 'Molar Extinction Coefficient 2', 'Secondary Structure helix', 'Secondary Structure turn', 'Secondary Structure sheet', 'aliphatic_aliphatic', 'aliphatic_positive', 'aliphatic_negative', 'aliphatic_uncharged', 'aliphatic_aromatic',

In [None]:
# Train Test Splitting
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from imblearn.over_sampling import ADASYN, SMOTE

def get_data(x_sample, y_sample):
  return np.array(x_sample), np.array(y_sample)

def data_splitting(x_sample, y_sample, mode="default", scaler="none", class_size=600, random_state=123):
  druggable_indices = (y_sample == 1)  # Assuming 1 represents druggable
  non_druggable_indices = (y_sample == 0)  # Assuming 0 represents non-druggable

  druggable_X = x_sample[druggable_indices]
  druggable_y = y_sample[druggable_indices]

  non_druggable_X = x_sample[non_druggable_indices]
  non_druggable_y = y_sample[non_druggable_indices]

  druggable_X_remaining, druggable_X_test, druggable_y_remaining, druggable_y_test = train_test_split(druggable_X, druggable_y, test_size=class_size, random_state=random_state)
  non_druggable_X_remaining, non_druggable_X_test, non_druggable_y_remaining, non_druggable_y_test = train_test_split(non_druggable_X, non_druggable_y, test_size= class_size, random_state=random_state)

  X_test = pd.concat((druggable_X_test, non_druggable_X_test))
  y_test = pd.concat((druggable_y_test, non_druggable_y_test))
  X_train = pd.concat((druggable_X_remaining, non_druggable_X_remaining))
  y_train = pd.concat((druggable_y_remaining, non_druggable_y_remaining))
  X_train, y_train = shuffle(X_train, y_train, random_state=random_state)
  if mode == "default":
    pass
  elif mode == "adasyn":
    ada = ADASYN(random_state=42)
    X_train, y_train = ada.fit_resample(X_train, y_train)
  elif mode == "smote":
    smt = SMOTE(random_state=42)
    X_train, y_train = smt.fit_resample(X_train, y_train)

  if scaler == "std":
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
  elif scaler == "minmax":
    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
  elif scaler == "none":
    pass

  return X_train, X_test, y_train, y_test


In [None]:
# rem-new-data is to extract only those proteins which are either approved druggable or non-druggable
# i.e., it excludes proteins which are non-approved but druggable
new_data = final_data.copy()
new_data['new_column'] = new_data['is_druggable'] + new_data['is_approved_druggable']
rem_new_data = new_data[new_data['new_column'] != 1]
rem_new_data.shape, np.bincount(rem_new_data['new_column'])

((19585, 186), array([16949,     0,  2636]))

### Majority Prediction of Partitions

In [None]:
import xgboost as xgb
from sklearn.metrics import accuracy_score

def complete_evaluate(random_state):
  if SETTING == 3:
    X_train, X_test, y_train, y_test = data_splitting(rem_new_data[features_list], rem_new_data['is_druggable'], random_state=random_state)
  print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
  print(np.bincount(y_train), np.bincount(y_test))
  X_train_druggable = X_train[y_train == 1]
  X_train_non_druggable = X_train[y_train == 0]

  X_train_non_druggable_partitions = np.array_split(X_train_non_druggable, round(len(X_train_non_druggable)/len(X_train_druggable)))
  print(f"Splitting into {len(X_train_non_druggable_partitions)} partitions")
  print("Sizes of partitions")
  for i, partition in enumerate(X_train_non_druggable_partitions):
    print(f"Partition {i}: {len(partition)}")
  xgb_models = []
  for partition in X_train_non_druggable_partitions:
    X_combined = np.concatenate((X_train_druggable, partition))
    y_combined = np.concatenate((np.ones(len(X_train_druggable)), np.zeros(len(partition))))
    xgb_model = xgb.XGBClassifier(objective='binary:logistic', random_state=42)
    xgb_model.fit(X_combined, y_combined)
    xgb_models.append(xgb_model)
  y_preds = []
  for model in xgb_models:
    y_pred = model.predict(X_test)
    y_preds.append(y_pred)

  majority_preds = np.mean(y_preds, axis=0)
  majority_preds = np.round(majority_preds)
  print(majority_preds.shape)

  y_pred_probas = []
  for model in xgb_models:
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    y_pred_probas.append(y_pred_proba)

  y_pred_probas = np.array(y_pred_probas)
  mean_pred_probas = np.mean(y_pred_probas, axis=0)
  average_proba_preds = np.round(mean_pred_probas)
  print(y_pred_probas.shape, mean_pred_probas.shape, average_proba_preds.shape)
  accuracy_metrics = {}
  for i, y_pred in enumerate(y_preds):
    accuracy_metrics[f"partition_{i}"]={
        "accuracy_total": accuracy_score(y_test, y_pred),
        "accuracy_druggable": accuracy_score(y_test[y_test == 1], y_pred[y_test == 1]),
        "accuracy_non_druggable": accuracy_score(y_test[y_test == 0], y_pred[y_test == 0]),
    }
    accuracy_metrics["majority_prediction"]={
        "accuracy_total": accuracy_score(y_test, majority_preds),
        "accuracy_druggable": accuracy_score(y_test[y_test == 1], majority_preds[y_test == 1]),
        "accuracy_non_druggable": accuracy_score(y_test[y_test == 0], majority_preds[y_test == 0]),
    }
    accuracy_metrics["average_probability_prediction"] = {
        "accuracy_total": accuracy_score(y_test, average_proba_preds),
        "accuracy_druggable": accuracy_score(y_test[y_test == 1], average_proba_preds[y_test == 1]),
        "accuracy_non_druggable": accuracy_score(y_test[y_test == 0], average_proba_preds[y_test == 0]),
    }


  df = pd.DataFrame(accuracy_metrics).transpose()
  return df



In [None]:
import random
scores_df = []
# randomly sample 20 from range(100)
random_states = random.sample(range(100), 20)
for random_state in random_states:
  scores_df.append(complete_evaluate(random_state))
  print(f"Completed for random state {random_state}")

(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]
Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043


  return bound(*args, **kwds)


(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 54
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]


  return bound(*args, **kwds)


Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043
(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 22
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]
Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043


  return bound(*args, **kwds)


(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 59
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]
Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043


  return bound(*args, **kwds)


(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 3
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]


  return bound(*args, **kwds)


Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043
(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 86
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]
Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043


  return bound(*args, **kwds)


(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 40
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]
Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043


  return bound(*args, **kwds)


(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 0
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]
Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043


  return bound(*args, **kwds)


(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 81
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]
Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043


  return bound(*args, **kwds)


(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 45
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]


  return bound(*args, **kwds)


Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043
(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 43
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]
Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043


  return bound(*args, **kwds)


(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 24
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]
Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043


  return bound(*args, **kwds)


(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 35
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]
Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043


  return bound(*args, **kwds)


(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 88
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]
Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043


  return bound(*args, **kwds)


(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 17
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]
Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043


  return bound(*args, **kwds)


(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 70
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]
Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043


  return bound(*args, **kwds)


(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 91
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]
Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043


  return bound(*args, **kwds)


(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 61
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]
Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043


  return bound(*args, **kwds)


(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 48
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]


  return bound(*args, **kwds)


Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043
(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 42
(18385, 183) (1200, 183) (18385,) (1200,)
[16349  2036] [600 600]


  return bound(*args, **kwds)


Splitting into 8 partitions
Sizes of partitions
Partition 0: 2044
Partition 1: 2044
Partition 2: 2044
Partition 3: 2044
Partition 4: 2044
Partition 5: 2043
Partition 6: 2043
Partition 7: 2043
(1200,)
(8, 1200) (1200,) (1200,)
Completed for random state 57


In [None]:
len(scores_df)

20

In [None]:
avg_scores = sum(scores_df)/20

In [None]:
avg_scores

Unnamed: 0,accuracy_total,accuracy_druggable,accuracy_non_druggable
partition_0,0.762292,0.771333,0.75325
majority_prediction,0.782208,0.7705,0.793917
average_probability_prediction,0.782417,0.791333,0.7735
partition_1,0.759542,0.7635,0.755583
partition_2,0.7615,0.767833,0.755167
partition_3,0.75975,0.767917,0.751583
partition_4,0.764542,0.768667,0.760417
partition_5,0.764,0.766917,0.761083
partition_6,0.76175,0.76275,0.76075
partition_7,0.762208,0.7665,0.757917


In [None]:
df_accuracy_totals = [scores_df[i]["accuracy_total"] for i in range(20)]
df_accuracy_druggables = [scores_df[i]["accuracy_druggable"] for i in range(20)]
df_accuracy_non_druggables = [scores_df[i]["accuracy_non_druggable"] for i in range(20)]

In [None]:
df_accuracy_totals = np.array(df_accuracy_totals)
df_accuracy_druggables = np.array(df_accuracy_druggables)
df_accuracy_non_druggables = np.array(df_accuracy_non_druggables)
df_accuracy_totals.shape, df_accuracy_druggables.shape, df_accuracy_non_druggables.shape

((20, 10), (20, 10), (20, 10))

In [None]:
np.mean(df_accuracy_totals, axis=0), np.std(df_accuracy_totals, axis=0)

(array([0.76229167, 0.78220833, 0.78241667, 0.75954167, 0.7615    ,
        0.75975   , 0.76454167, 0.764     , 0.76175   , 0.76220833]),
 array([0.01083614, 0.00874117, 0.0084816 , 0.00893835, 0.01082564,
        0.00886668, 0.00798643, 0.00927062, 0.01265158, 0.01026143]))

In [None]:
np.mean(df_accuracy_druggables, axis=0), np.std(df_accuracy_druggables, axis=0)

(array([0.77133333, 0.7705    , 0.79133333, 0.7635    , 0.76783333,
        0.76791667, 0.76866667, 0.76691667, 0.76275   , 0.7665    ]),
 array([0.01551523, 0.01454017, 0.01418724, 0.01759814, 0.01480522,
        0.01377271, 0.01238951, 0.01409566, 0.0197222 , 0.01680691]))

In [None]:
np.mean(df_accuracy_non_druggables, axis=0), np.std(df_accuracy_non_druggables, axis=0)

(array([0.75325   , 0.79391667, 0.7735    , 0.75558333, 0.75516667,
        0.75158333, 0.76041667, 0.76108333, 0.76075   , 0.75791667]),
 array([0.01857025, 0.01697445, 0.01561605, 0.01635776, 0.01525615,
        0.01645258, 0.01466169, 0.01577863, 0.01881544, 0.01787981]))