In [None]:
# Setting seeds for reproducibility
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
import json
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from imblearn.over_sampling import ADASYN, SMOTE
import pandas as pd
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
SETTING = 3

In [None]:
data_file_path = "../../Features/PCFs/files_for_ml/protein_props.json"
druggable_proteins_file_path = "../../DrugBank/druggable_proteins.txt"
approved_druggable_proteins_file_path = "../../DrugBank/approved_druggable_proteins.txt"

with open(data_file_path, 'r') as f:
    protein_data = json.load(f)

print("Total number of uniprot human verified proteins:", len(protein_data))

# Extracting list of druggable and approved druggable proteins
with open(druggable_proteins_file_path, 'r') as f:
    druggable_proteins = f.read().splitlines()

with open(approved_druggable_proteins_file_path, 'r') as f:
    approved_druggable_proteins = f.read().splitlines()

print("Number of druggable proteins:", len(druggable_proteins))
print("Number of approved druggable proteins:", len(approved_druggable_proteins))


# Fetching feature data for all proteins
properties = (pd.read_json("../../Features/PCFs/files_for_ml/protein_props.json")).transpose()
is_druggable = [1 if i in druggable_proteins else 0 for i in properties.index]
is_approved_druggable = [1 if i in approved_druggable_proteins else 0 for i in properties.index]

properties["is_druggable"] = is_druggable
properties["is_approved_druggable"] = is_approved_druggable

PCP_properties = properties.copy()
amino_acids = 'ACDEFGHIKLMNPQRSTVWY'
amino_acid_percent = {i:[] for i in amino_acids}
for i in PCP_properties['Amino Acid Percent']:
  for aa in amino_acids:
    amino_acid_percent[aa].append(i[aa])
for aa in amino_acids:
  PCP_properties = pd.concat([PCP_properties, pd.Series(amino_acid_percent[aa], index = PCP_properties.index, name = f"Amino Acid Percent {aa}")], axis = 1)

PCP_properties[f"Molar Extinction Coefficient 1"] = pd.Series([x[0] for x in PCP_properties['Molar Extinction Coefficient']], index = PCP_properties.index)
PCP_properties[f"Molar Extinction Coefficient 2"] = pd.Series([x[1] for x in PCP_properties['Molar Extinction Coefficient']], index = PCP_properties.index)

PCP_properties[f"Secondary Structure helix"] = pd.Series([x[0] for x in PCP_properties['Secondary Structure']], index = PCP_properties.index)
PCP_properties[f"Secondary Structure turn"] = pd.Series([x[1] for x in PCP_properties['Secondary Structure']], index = PCP_properties.index)
PCP_properties[f"Secondary Structure sheet"] = pd.Series([x[2] for x in PCP_properties['Secondary Structure']], index = PCP_properties.index)

PCP_properties.drop(columns = ['Amino Acid Count','Amino Acid Percent',"Molar Extinction Coefficient","Flexibility","Secondary Structure",'Sequence'], inplace = True)
PCP_properties['Sequence Length'] = PCP_properties['Sequence Length'].astype(int)
PCP_properties[['Molecular Weight', 'GRAVY', 'Isoelectric Point', 'Instability Index', 'Aromaticity', 'Charge at 7']] = PCP_properties[['Molecular Weight', 'GRAVY', 'Isoelectric Point', 'Instability Index', 'Aromaticity', 'Charge at 7']].astype(float)

with open("/content/drive/MyDrive/protein_props/features/gdpc_encodings.json", 'r') as file:
    data = json.load(file)
gpdc_encodings = pd.DataFrame(data).transpose()

ppi = pd.read_json("../../Features/PPIs/files_for_ml/ppi.json").transpose()
ppi_network = pd.read_csv("../../Features/PPIs/files_for_ml/ppi_network_properties.csv")
ppi_network.index = ppi_network['Unnamed: 0']
ppi_network.drop(columns = ['Unnamed: 0'], inplace = True)
ppi = pd.concat([ppi, ppi_network], axis = 1)

glycolisation = pd.read_csv("../../Features/PTMs/files_for_ml/glycosylation.csv")
glycolisation.index = glycolisation['Unnamed: 0']
glycolisation.drop(columns = ['Unnamed: 0'], inplace = True)
ptm = pd.read_csv("../../Features/PTMs/files_for_ml/PTM_counts.csv")
ptm.index = ptm["Unnamed: 0"]
ptm.drop(columns = ['Unnamed: 0'], inplace = True)
ptm_counts = pd.concat([ptm, glycolisation], axis = 1)

with open("../../Features/SCL/files_for_ml/subcellular_locations2.json", 'r') as file:
    data = json.load(file)
unique_groups = set()
for entry in data.values():
    if "general" in entry:
        for general_entry in entry["general"]:
            if "group" in general_entry: unique_groups.add(general_entry["group"])

unique_groups_list = list(unique_groups)

rows = []
for protein_id in PCP_properties.index:
    row = {group: 0 for group in unique_groups_list}
    if protein_id in data:
        for entry in data[protein_id].get("general", []):
            if "group" in entry and entry["group"] in unique_groups:
                row[entry["group"]] = 1
    row["protein_id"] = protein_id
    rows.append(row)

subcellular_data = pd.DataFrame(rows).set_index("protein_id")

domains = pd.read_csv("../../Features/Domains/files_for_ml/data_top20.csv")
domains.index = domains['Unnamed: 0']
domains.drop(columns = ['Unnamed: 0'], inplace = True)

flexibility = pd.read_csv("../../Features/PCFs/files_for_ml/flexibility_properties.csv")
flexibility.index = flexibility['Unnamed: 0']
flexibility.drop(columns = ['Unnamed: 0'], inplace = True)

latent_data = pd.read_csv("../../Features/Latents/files_for_ml/latent_values.csv").transpose()
latent_data.columns = [f"Latent_Value_{i+1}" for i in latent_data.columns]
final_data = pd.concat([PCP_properties,gpdc_encodings, ptm_counts, ppi, subcellular_data, domains, flexibility, latent_data], axis = 1).dropna()
features_list = final_data.columns
features_list = features_list.drop(['is_druggable','is_approved_druggable'])
features_list = list(features_list)
print(features_list)
print(len(features_list))


Total number of uniprot human verified proteins: 20434
Number of druggable proteins: 3345
Number of approved druggable proteins: 2652
['Sequence Length', 'Molecular Weight', 'GRAVY', 'Isoelectric Point', 'Instability Index', 'Aromaticity', 'Charge at 7', 'Amino Acid Percent A', 'Amino Acid Percent C', 'Amino Acid Percent D', 'Amino Acid Percent E', 'Amino Acid Percent F', 'Amino Acid Percent G', 'Amino Acid Percent H', 'Amino Acid Percent I', 'Amino Acid Percent K', 'Amino Acid Percent L', 'Amino Acid Percent M', 'Amino Acid Percent N', 'Amino Acid Percent P', 'Amino Acid Percent Q', 'Amino Acid Percent R', 'Amino Acid Percent S', 'Amino Acid Percent T', 'Amino Acid Percent V', 'Amino Acid Percent W', 'Amino Acid Percent Y', 'Molar Extinction Coefficient 1', 'Molar Extinction Coefficient 2', 'Secondary Structure helix', 'Secondary Structure turn', 'Secondary Structure sheet', 'aliphatic_aliphatic', 'aliphatic_positive', 'aliphatic_negative', 'aliphatic_uncharged', 'aliphatic_aromatic',

In [None]:
# Train Test Splitting
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from imblearn.over_sampling import ADASYN, SMOTE

def get_data(x_sample, y_sample):
  return np.array(x_sample), np.array(y_sample)

def data_splitting(x_sample, y_sample, mode="default", scaler="none", class_size=600):
  druggable_indices = (y_sample == 1)  # Assuming 1 represents druggable
  non_druggable_indices = (y_sample == 0)  # Assuming 0 represents non-druggable

  druggable_X = x_sample[druggable_indices]
  druggable_y = y_sample[druggable_indices]

  non_druggable_X = x_sample[non_druggable_indices]
  non_druggable_y = y_sample[non_druggable_indices]

  druggable_X_remaining, druggable_X_test, druggable_y_remaining, druggable_y_test = train_test_split(druggable_X, druggable_y, test_size=class_size, random_state=123)
  non_druggable_X_remaining, non_druggable_X_test, non_druggable_y_remaining, non_druggable_y_test = train_test_split(non_druggable_X, non_druggable_y, test_size= class_size, random_state=123)

  X_test = pd.concat((druggable_X_test, non_druggable_X_test))
  y_test = pd.concat((druggable_y_test, non_druggable_y_test))
  X_train = pd.concat((druggable_X_remaining, non_druggable_X_remaining))
  y_train = pd.concat((druggable_y_remaining, non_druggable_y_remaining))
  X_train, y_train = shuffle(X_train, y_train, random_state=123)
  if mode == "default":
    pass
  elif mode == "adasyn":
    ada = ADASYN(random_state=42)
    X_train, y_train = ada.fit_resample(X_train, y_train)
  elif mode == "smote":
    smt = SMOTE(random_state=42)
    X_train, y_train = smt.fit_resample(X_train, y_train)

  if scaler == "std":
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
  elif scaler == "minmax":
    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
  elif scaler == "none":
    pass

  return X_train, X_test, y_train, y_test


In [None]:
# rem-new-data is to extract only those proteins which are either approved druggable or non-druggable
# i.e., it excludes proteins which are non-approved but druggable
new_data = final_data.copy()
new_data['new_column'] = new_data['is_druggable'] + new_data['is_approved_druggable']
rem_new_data = new_data[new_data['new_column'] != 1]
rem_new_data.shape, np.bincount(rem_new_data['new_column'])

((19585, 186), array([16949,     0,  2636]))

In [None]:
if SETTING == 1:
  X_train, X_test, y_train, y_test = data_splitting(final_data[features_list], final_data['is_druggable'])
elif SETTING == 2:
  X_train, X_test, y_train, y_test = data_splitting(final_data[features_list], final_data['is_approved_druggable'])
elif SETTING == 3:
  X_train, X_test, y_train, y_test = data_splitting(rem_new_data[features_list], rem_new_data['is_druggable'])

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((18385, 183), (1200, 183), (18385,), (1200,))

In [None]:
import numpy as np
np.bincount(y_train), np.bincount(y_test)

(array([16349,  2036]), array([600, 600]))

### Feature Selection Scores using the partition Method

In [None]:
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

In [None]:
# On entire data
X_combined = np.concatenate((X_train, X_test))
y_combined = np.concatenate((y_train, y_test))

# shuffling
X_combined, y_combined = shuffle(X_combined, y_combined, random_state=123)
X_combined.shape, y_combined.shape

((19585, 183), (19585,))

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
np.bincount(y_combined)

array([16949,  2636])

### Feature Scores using Partition Method of Train Data using several RF models

In [None]:
X_combined_druggable = X_combined[y_combined == 1]
X_combined_non_druggable = X_combined[y_combined == 0]
number_partitions = int(X_combined_non_druggable.shape[0]/X_combined_druggable.shape[0])
X_combined_non_druggable_partitions = np.array_split(X_combined_non_druggable, number_partitions)

print("Number of partitions:", len(X_combined_non_druggable_partitions))
for partition in X_combined_non_druggable_partitions:
  print(partition.shape)

Number of partitions: 6
(2825, 183)
(2825, 183)
(2825, 183)
(2825, 183)
(2825, 183)
(2824, 183)


In [None]:
rf_models = []
training_metrics = {}
for i, partition in enumerate(X_combined_non_druggable_partitions):
  X_train_new = np.concatenate((X_combined_druggable, partition))
  y_train_new = np.concatenate((np.ones(X_combined_druggable.shape[0]), np.zeros(partition.shape[0])))
  X_train_new, y_train_new = shuffle(X_train_new, y_train_new, random_state=123)
  rf_model = RandomForestClassifier(random_state=27)
  rf_model.fit(X_train_new, y_train_new)
  rf_models.append(rf_model)
  training_metrics[f"partition_{i}"] = {
      "accuracy_total" : rf_model.score(X_train_new, y_train_new),
      "accuracy_druggable": rf_model.score(X_train_new[y_train_new == 1], y_train_new[y_train_new == 1]),
      "accuracy_non-druggable": rf_model.score(X_train_new[y_train_new == 0], y_train_new[y_train_new == 0])
  }

In [None]:
test_metrics = {}
for i in range(number_partitions):
  model = rf_models[i]
  remaining_non_druggable_partitions = []
  for j in range(number_partitions):
    if j != i:
      remaining_non_druggable_partitions.append(X_combined_non_druggable_partitions[j])
  remaining_non_druggable_partitions = np.concatenate(remaining_non_druggable_partitions)
  test_metrics[f"partition_{i}"] = {
      "accuracy_non_druggable": model.score(remaining_non_druggable_partitions, np.zeros(remaining_non_druggable_partitions.shape[0]))
  }


In [None]:
training_metrics, test_metrics

({'partition_0': {'accuracy_total': 1.0,
   'accuracy_druggable': 1.0,
   'accuracy_non-druggable': 1.0},
  'partition_1': {'accuracy_total': 1.0,
   'accuracy_druggable': 1.0,
   'accuracy_non-druggable': 1.0},
  'partition_2': {'accuracy_total': 1.0,
   'accuracy_druggable': 1.0,
   'accuracy_non-druggable': 1.0},
  'partition_3': {'accuracy_total': 1.0,
   'accuracy_druggable': 1.0,
   'accuracy_non-druggable': 1.0},
  'partition_4': {'accuracy_total': 1.0,
   'accuracy_druggable': 1.0,
   'accuracy_non-druggable': 1.0},
  'partition_5': {'accuracy_total': 1.0,
   'accuracy_druggable': 1.0,
   'accuracy_non-druggable': 1.0}},
 {'partition_0': {'accuracy_non_druggable': 0.7785329934862645},
  'partition_1': {'accuracy_non_druggable': 0.7689039932030586},
  'partition_2': {'accuracy_non_druggable': 0.7769753610875106},
  'partition_3': {'accuracy_non_druggable': 0.7748513169073916},
  'partition_4': {'accuracy_non_druggable': 0.764231096006797},
  'partition_5': {'accuracy_non_druggab

In [None]:
def get_dict_form(feature_scores):
  return {feature: score for feature, score in zip(features_list, feature_scores)}

In [None]:
mean_feature_importances = np.mean([model.feature_importances_ for model in rf_models], axis=0)

In [None]:
model_fs_scores = {}
for i in range(number_partitions):
  model_fs_scores[f"Partition_{i+1}"] = get_dict_form(rf_models[i].feature_importances_)
model_fs_scores["Partition_Average"] = get_dict_form(mean_feature_importances)

In [None]:
df = pd.DataFrame(model_fs_scores)
if SETTING == 3:
  df.to_csv("partition_avg_feature_scores.csv")

In [None]:
df.head()

Unnamed: 0,Partition_1,Partition_2,Partition_3,Partition_4,Partition_5,Partition_6,Partition_Average
Sequence Length,0.011226,0.010312,0.011767,0.01032,0.011286,0.011629,0.01109
Molecular Weight,0.011913,0.010879,0.012067,0.01396,0.012021,0.010183,0.011837
GRAVY,0.016238,0.01656,0.016283,0.01814,0.015923,0.015817,0.016494
Isoelectric Point,0.007493,0.008046,0.008086,0.008382,0.007919,0.007414,0.00789
Instability Index,0.020372,0.02042,0.018369,0.020457,0.022025,0.018885,0.020088


### Experimenting with Increasing Number of Top Features based on Feature Selection scores from Partition Average

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

def evaluate_partition_method_model(features):
  if SETTING == 1:
    X_test, X_train, y_test, y_train = data_splitting(final_data[features], final_data["is_druggable"], mode="default", scaler="none", class_size=2700)
  elif SETTING == 2:
    X_test, X_train, y_test, y_train = data_splitting(final_data[features], final_data["is_approved_druggable"], mode="default", scaler="none", class_size=2000)
  elif SETTING == 3:
    X_train, X_test, y_train, y_test = data_splitting(rem_new_data[features], rem_new_data['is_druggable'], class_size = 600)

  # Debugging purposes
  if len(features) == 1:
    print(f"X_train shape is {X_train.shape}")
    print(f"X_test shape is {X_test.shape}")
    print(f"y_train distribution is {np.bincount(y_train)}")
    print(f"y_test distribution is {np.bincount(y_test)}")

  X_train_druggable = X_train[y_train == 1]
  X_train_non_druggable = X_train[y_train == 0]
  X_train_non_druggable_partitions = np.array_split(X_train_non_druggable, round(len(X_train_non_druggable)/len(X_train_druggable)))

  # Debugging purposes
  if len(features) == 1:
    print(f"Splitting into {len(X_train_non_druggable_partitions)} partitions")
    print("Sizes of partitions:")
    for i, partition in enumerate(X_train_non_druggable_partitions):
      print(f"Partition {i+1}: {partition.shape}")

  rf_models = []
  for partition in X_train_non_druggable_partitions:
    X_train_combined = np.concatenate((np.array(X_train_druggable), np.array(partition)))
    y_train_combined = np.concatenate((np.ones(X_train_druggable.shape[0]), np.zeros(partition.shape[0])))
    rf_model = RandomForestClassifier(random_state=27)
    rf_model.fit(X_train_combined, y_train_combined)
    rf_models.append(rf_model)

  y_pred_probas = []
  for model in rf_models:
    y_pred_probas.append(model.predict_proba(X_test)[:,1])
  average_pred_probas = np.mean(np.array(y_pred_probas), axis=0)
  y_pred = np.round(average_pred_probas)

  return {
      "test_accuracy": accuracy_score(y_test, y_pred),
      "test_accuracy_druggable": accuracy_score(y_test[y_test == 1], y_pred[y_test == 1]),
      "test_accuracy_non-druggable": accuracy_score(y_test[y_test == 0], y_pred[y_test == 0])
  }

In [None]:
if SETTING == 3:
  fs_scores = pd.read_csv("RF/partition_avg_feature_scores.csv")
fs_scores_partition_avg = fs_scores["Partition_Average"].values
fs_scores_partition_avg = {feature: score for feature, score in zip(features_list, fs_scores_partition_avg)}
fs_scores_partition_avg = {k: v for k, v in sorted(fs_scores_partition_avg.items(), key=lambda item: item[1], reverse=True)}

metrics_with_fs_scores_partition_avg = {}
for i in range(1, len(features_list)+1):
  features = list(fs_scores_partition_avg.keys())[:i]
  metrics_with_fs_scores_partition_avg[i] = evaluate_partition_method_model(features)
  print(f"{i} Done with accuracy {metrics_with_fs_scores_partition_avg[i]['test_accuracy']}")

X_train shape is (18385, 1)
X_test shape is (1200, 1)
y_train distribution is [16349  2036]
y_test distribution is [600 600]
Splitting into 8 partitions
Sizes of partitions:
Partition 1: (2044, 1)
Partition 2: (2044, 1)
Partition 3: (2044, 1)
Partition 4: (2044, 1)
Partition 5: (2044, 1)
Partition 6: (2043, 1)
Partition 7: (2043, 1)
Partition 8: (2043, 1)


  return bound(*args, **kwds)


1 Done with accuracy 0.5958333333333333


  return bound(*args, **kwds)


2 Done with accuracy 0.5975


  return bound(*args, **kwds)
  return bound(*args, **kwds)


3 Done with accuracy 0.6191666666666666


  return bound(*args, **kwds)


4 Done with accuracy 0.6716666666666666


  return bound(*args, **kwds)


5 Done with accuracy 0.6908333333333333


  return bound(*args, **kwds)


6 Done with accuracy 0.7033333333333334


  return bound(*args, **kwds)


7 Done with accuracy 0.7091666666666666


  return bound(*args, **kwds)


8 Done with accuracy 0.7041666666666667




9 Done with accuracy 0.7158333333333333


  return bound(*args, **kwds)
  return bound(*args, **kwds)


10 Done with accuracy 0.7125


  return bound(*args, **kwds)


11 Done with accuracy 0.7116666666666667


  return bound(*args, **kwds)


12 Done with accuracy 0.7191666666666666


  return bound(*args, **kwds)


13 Done with accuracy 0.7241666666666666


  return bound(*args, **kwds)


14 Done with accuracy 0.7216666666666667


  return bound(*args, **kwds)


15 Done with accuracy 0.7316666666666667


  return bound(*args, **kwds)


16 Done with accuracy 0.7233333333333334


  return bound(*args, **kwds)


17 Done with accuracy 0.7408333333333333


  return bound(*args, **kwds)


18 Done with accuracy 0.7333333333333333




19 Done with accuracy 0.7275


  return bound(*args, **kwds)
  return bound(*args, **kwds)


20 Done with accuracy 0.735


  return bound(*args, **kwds)


21 Done with accuracy 0.7383333333333333


  return bound(*args, **kwds)


22 Done with accuracy 0.7425


  return bound(*args, **kwds)


23 Done with accuracy 0.7408333333333333


  return bound(*args, **kwds)


24 Done with accuracy 0.7391666666666666




25 Done with accuracy 0.7433333333333333


  return bound(*args, **kwds)
  return bound(*args, **kwds)


26 Done with accuracy 0.74




27 Done with accuracy 0.7458333333333333


  return bound(*args, **kwds)
  return bound(*args, **kwds)


28 Done with accuracy 0.7433333333333333


  return bound(*args, **kwds)


29 Done with accuracy 0.7391666666666666




30 Done with accuracy 0.7375


  return bound(*args, **kwds)
  return bound(*args, **kwds)


31 Done with accuracy 0.7341666666666666


  return bound(*args, **kwds)


32 Done with accuracy 0.7466666666666667


  return bound(*args, **kwds)


33 Done with accuracy 0.74


  return bound(*args, **kwds)


34 Done with accuracy 0.745




35 Done with accuracy 0.7466666666666667


  return bound(*args, **kwds)
  return bound(*args, **kwds)


36 Done with accuracy 0.74


  return bound(*args, **kwds)


37 Done with accuracy 0.7458333333333333


  return bound(*args, **kwds)


38 Done with accuracy 0.7475


  return bound(*args, **kwds)


39 Done with accuracy 0.7483333333333333


  return bound(*args, **kwds)


40 Done with accuracy 0.7491666666666666




41 Done with accuracy 0.74


  return bound(*args, **kwds)


42 Done with accuracy 0.7483333333333333


  return bound(*args, **kwds)
  return bound(*args, **kwds)


43 Done with accuracy 0.7491666666666666


  return bound(*args, **kwds)


44 Done with accuracy 0.7416666666666667


  return bound(*args, **kwds)


45 Done with accuracy 0.7391666666666666


  return bound(*args, **kwds)


46 Done with accuracy 0.7425




47 Done with accuracy 0.7516666666666667


  return bound(*args, **kwds)


48 Done with accuracy 0.7516666666666667


  return bound(*args, **kwds)


49 Done with accuracy 0.7483333333333333


  return bound(*args, **kwds)
  return bound(*args, **kwds)


50 Done with accuracy 0.7525


  return bound(*args, **kwds)


51 Done with accuracy 0.755


  return bound(*args, **kwds)


52 Done with accuracy 0.7541666666666667


  return bound(*args, **kwds)


53 Done with accuracy 0.7541666666666667


  return bound(*args, **kwds)


54 Done with accuracy 0.7566666666666667


  return bound(*args, **kwds)


55 Done with accuracy 0.7441666666666666


  return bound(*args, **kwds)


56 Done with accuracy 0.7483333333333333




57 Done with accuracy 0.7483333333333333


  return bound(*args, **kwds)


58 Done with accuracy 0.7525


  return bound(*args, **kwds)
  return bound(*args, **kwds)


59 Done with accuracy 0.7475


  return bound(*args, **kwds)


60 Done with accuracy 0.75


  return bound(*args, **kwds)


61 Done with accuracy 0.7491666666666666




62 Done with accuracy 0.7508333333333334


  return bound(*args, **kwds)


63 Done with accuracy 0.7466666666666667


  return bound(*args, **kwds)
  return bound(*args, **kwds)


64 Done with accuracy 0.7483333333333333




65 Done with accuracy 0.7483333333333333


  return bound(*args, **kwds)
  return bound(*args, **kwds)


66 Done with accuracy 0.7541666666666667




67 Done with accuracy 0.745


  return bound(*args, **kwds)
  return bound(*args, **kwds)


68 Done with accuracy 0.7508333333333334


  return bound(*args, **kwds)


69 Done with accuracy 0.7475


  return bound(*args, **kwds)


70 Done with accuracy 0.7558333333333334


  return bound(*args, **kwds)


71 Done with accuracy 0.7408333333333333


  return bound(*args, **kwds)


72 Done with accuracy 0.7491666666666666


  return bound(*args, **kwds)


73 Done with accuracy 0.7441666666666666


  return bound(*args, **kwds)


74 Done with accuracy 0.7425


  return bound(*args, **kwds)


75 Done with accuracy 0.7483333333333333


  return bound(*args, **kwds)


76 Done with accuracy 0.7425


  return bound(*args, **kwds)


77 Done with accuracy 0.7483333333333333


  return bound(*args, **kwds)


78 Done with accuracy 0.7566666666666667


  return bound(*args, **kwds)


79 Done with accuracy 0.7491666666666666


  return bound(*args, **kwds)


80 Done with accuracy 0.7475


  return bound(*args, **kwds)


81 Done with accuracy 0.7516666666666667




82 Done with accuracy 0.75


  return bound(*args, **kwds)


83 Done with accuracy 0.7575


  return bound(*args, **kwds)
  return bound(*args, **kwds)


84 Done with accuracy 0.7433333333333333


  return bound(*args, **kwds)


85 Done with accuracy 0.7525


  return bound(*args, **kwds)


86 Done with accuracy 0.7533333333333333


  return bound(*args, **kwds)


87 Done with accuracy 0.7466666666666667




88 Done with accuracy 0.7475


  return bound(*args, **kwds)
  return bound(*args, **kwds)


89 Done with accuracy 0.7475


  return bound(*args, **kwds)


90 Done with accuracy 0.7508333333333334


  return bound(*args, **kwds)


91 Done with accuracy 0.7441666666666666


  return bound(*args, **kwds)


92 Done with accuracy 0.7475




93 Done with accuracy 0.7475


  return bound(*args, **kwds)
  return bound(*args, **kwds)


94 Done with accuracy 0.7425


  return bound(*args, **kwds)


95 Done with accuracy 0.7575


  return bound(*args, **kwds)


96 Done with accuracy 0.745




97 Done with accuracy 0.75


  return bound(*args, **kwds)
  return bound(*args, **kwds)


98 Done with accuracy 0.7483333333333333


  return bound(*args, **kwds)


99 Done with accuracy 0.7483333333333333


  return bound(*args, **kwds)


100 Done with accuracy 0.75


  return bound(*args, **kwds)


101 Done with accuracy 0.7483333333333333


  return bound(*args, **kwds)


102 Done with accuracy 0.7516666666666667


  return bound(*args, **kwds)


103 Done with accuracy 0.7441666666666666


  return bound(*args, **kwds)


104 Done with accuracy 0.755


  return bound(*args, **kwds)


105 Done with accuracy 0.7508333333333334


  return bound(*args, **kwds)


106 Done with accuracy 0.7516666666666667


  return bound(*args, **kwds)


107 Done with accuracy 0.755




108 Done with accuracy 0.7491666666666666


  return bound(*args, **kwds)
  return bound(*args, **kwds)


109 Done with accuracy 0.7491666666666666


  return bound(*args, **kwds)


110 Done with accuracy 0.7541666666666667


  return bound(*args, **kwds)


111 Done with accuracy 0.7541666666666667




112 Done with accuracy 0.7425


  return bound(*args, **kwds)


113 Done with accuracy 0.7541666666666667


  return bound(*args, **kwds)
  return bound(*args, **kwds)


114 Done with accuracy 0.7491666666666666




115 Done with accuracy 0.7508333333333334


  return bound(*args, **kwds)
  return bound(*args, **kwds)


116 Done with accuracy 0.75


  return bound(*args, **kwds)


117 Done with accuracy 0.7516666666666667


  return bound(*args, **kwds)


118 Done with accuracy 0.7508333333333334




119 Done with accuracy 0.7508333333333334


  return bound(*args, **kwds)
  return bound(*args, **kwds)


120 Done with accuracy 0.7516666666666667


  return bound(*args, **kwds)


121 Done with accuracy 0.7558333333333334


  return bound(*args, **kwds)


122 Done with accuracy 0.7533333333333333




123 Done with accuracy 0.75


  return bound(*args, **kwds)


124 Done with accuracy 0.7508333333333334


  return bound(*args, **kwds)


125 Done with accuracy 0.7525


  return bound(*args, **kwds)


126 Done with accuracy 0.7608333333333334


  return bound(*args, **kwds)
  return bound(*args, **kwds)


127 Done with accuracy 0.7583333333333333




128 Done with accuracy 0.7566666666666667


  return bound(*args, **kwds)


129 Done with accuracy 0.7516666666666667


  return bound(*args, **kwds)


130 Done with accuracy 0.7483333333333333


  return bound(*args, **kwds)
  return bound(*args, **kwds)


131 Done with accuracy 0.7583333333333333




132 Done with accuracy 0.7508333333333334


  return bound(*args, **kwds)
  return bound(*args, **kwds)


133 Done with accuracy 0.7533333333333333




134 Done with accuracy 0.7483333333333333


  return bound(*args, **kwds)


135 Done with accuracy 0.7508333333333334


  return bound(*args, **kwds)
  return bound(*args, **kwds)


136 Done with accuracy 0.7483333333333333




137 Done with accuracy 0.7508333333333334


  return bound(*args, **kwds)


138 Done with accuracy 0.7583333333333333


  return bound(*args, **kwds)


139 Done with accuracy 0.7475


  return bound(*args, **kwds)
  return bound(*args, **kwds)


140 Done with accuracy 0.7566666666666667




141 Done with accuracy 0.7516666666666667


  return bound(*args, **kwds)
  return bound(*args, **kwds)


142 Done with accuracy 0.7541666666666667




143 Done with accuracy 0.7466666666666667


  return bound(*args, **kwds)


144 Done with accuracy 0.7508333333333334


  return bound(*args, **kwds)


145 Done with accuracy 0.7558333333333334


  return bound(*args, **kwds)


146 Done with accuracy 0.7466666666666667


  return bound(*args, **kwds)


147 Done with accuracy 0.75


  return bound(*args, **kwds)
  return bound(*args, **kwds)


148 Done with accuracy 0.7491666666666666


  return bound(*args, **kwds)


149 Done with accuracy 0.745




150 Done with accuracy 0.7533333333333333


  return bound(*args, **kwds)


151 Done with accuracy 0.75


  return bound(*args, **kwds)
  return bound(*args, **kwds)


152 Done with accuracy 0.745




153 Done with accuracy 0.7566666666666667


  return bound(*args, **kwds)


154 Done with accuracy 0.7525


  return bound(*args, **kwds)


155 Done with accuracy 0.7541666666666667


  return bound(*args, **kwds)
  return bound(*args, **kwds)


156 Done with accuracy 0.7541666666666667




157 Done with accuracy 0.7525


  return bound(*args, **kwds)
  return bound(*args, **kwds)


158 Done with accuracy 0.75


  return bound(*args, **kwds)


159 Done with accuracy 0.7591666666666667


  return bound(*args, **kwds)


160 Done with accuracy 0.7458333333333333


  return bound(*args, **kwds)


161 Done with accuracy 0.7441666666666666


  return bound(*args, **kwds)


162 Done with accuracy 0.7491666666666666




163 Done with accuracy 0.755


  return bound(*args, **kwds)
  return bound(*args, **kwds)


164 Done with accuracy 0.7416666666666667




165 Done with accuracy 0.7516666666666667


  return bound(*args, **kwds)
  return bound(*args, **kwds)


166 Done with accuracy 0.7491666666666666




167 Done with accuracy 0.7516666666666667


  return bound(*args, **kwds)


168 Done with accuracy 0.7533333333333333


  return bound(*args, **kwds)


169 Done with accuracy 0.7475


  return bound(*args, **kwds)
  return bound(*args, **kwds)


170 Done with accuracy 0.755


  return bound(*args, **kwds)


171 Done with accuracy 0.7416666666666667




172 Done with accuracy 0.7458333333333333


  return bound(*args, **kwds)
  return bound(*args, **kwds)


173 Done with accuracy 0.7508333333333334


  return bound(*args, **kwds)


174 Done with accuracy 0.7558333333333334




175 Done with accuracy 0.75


  return bound(*args, **kwds)
  return bound(*args, **kwds)


176 Done with accuracy 0.7508333333333334


  return bound(*args, **kwds)


177 Done with accuracy 0.7466666666666667




178 Done with accuracy 0.7466666666666667


  return bound(*args, **kwds)


179 Done with accuracy 0.7475


  return bound(*args, **kwds)


180 Done with accuracy 0.7491666666666666


  return bound(*args, **kwds)
  return bound(*args, **kwds)


181 Done with accuracy 0.7458333333333333




182 Done with accuracy 0.7475


  return bound(*args, **kwds)


183 Done with accuracy 0.7533333333333333




In [None]:
df = pd.DataFrame(metrics_with_fs_scores_partition_avg).transpose()
if SETTING == 3:
  df.to_csv("RF/feature_improvement_metrics.csv")

In [None]:
df.head()

Unnamed: 0,test_accuracy,test_accuracy_druggable,test_accuracy_non-druggable
1,0.595833,0.641667,0.55
2,0.5975,0.636667,0.558333
3,0.619167,0.636667,0.601667
4,0.671667,0.665,0.678333
5,0.690833,0.686667,0.695
