In [1]:
import pandas as pd
import matplotlib.pyplot as plt 

from scipy.stats import ttest_ind

# Classifiers
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB


from sklearn.model_selection import GridSearchCV
from sklearn.metrics import log_loss, ConfusionMatrixDisplay, average_precision_score, accuracy_score


# suppress futurewarnings
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

# Processing raw data
Processing and preparation of training data from the raw data

In [3]:
# READING DATA
exprsn_data = pd.read_csv("C:/Users/TEJINDER SINGH/Downloads/New folder (13)/esets_renal_exprs.csv")
output_labels = pd.read_csv("C:/Users/TEJINDER SINGH/Downloads/kidney_label.csv")

# DATA TRANSPOSE
out_t = output_labels.set_index("Source Name").transpose()
kidney_data = exprsn_data.set_index("Unnamed: 0").transpose()

# SELECTING THE COLUMNS or SAMPLE NAMES
data_c = exprsn_data.columns
out_c = out_t.columns

# GENERATING LABELS FOR DATASET
output = pd.DataFrame()
for i in data_c:
    for j in out_c:
        if i == j:
            output[i] = out_t[j]
output = output.T
labels = output.pop("Characteristics[disease]")

# Get gene names
Saving data and get gene names using annotation package from bioconductor using R programming language

In [14]:
kidney_data["labels"] = labels
kidney_data.to_csv(r"C:\Users\TEJINDER SINGH\Downloads\New folder (13)\Project Datasets\kidney_data.csv", index = True)

# Feature engineering
Selecting and merging data from probes belonging to the same gene by computing their mean.

In [17]:
def merge_common_genes(data):
    """
    Selecting and merging data from probes belonging to the same gene by computing their mean.
    
    Args:
    Dataset containg gene symbols as column names
    
    Returns:
    Dataset containg gene symbols as column names after merging columns with same names/genes
    """
    nan_list = [item for item, frame in data["Symbol"].iteritems() if pd.isnull(frame)]        

    for i in nan_list:                                  
        data.loc[i,'Symbol'] = data.loc[i,'Unnamed..0']#Adding probe names to the "Symbols" column with missing values
    data = data.set_index("Symbol").transpose()
    data.drop("Unnamed..0", inplace = True)
    data = pd.DataFrame(data, dtype = float)
    data = data.groupby(data.columns, axis=1).mean()

    return data

In [18]:
gene_symbol_data = pd.read_csv("C:/Users/TEJINDER SINGH/Downloads/New folder (13)/kidney_data_gene_names.csv")
gene_data = merge_common_genes(gene_symbol_data)
gene_data

Symbol,1007_s_at,1294_at,1552283_s_at,1552388_at,1552401_a_at,1552411_at,1552412_a_at,1552449_a_at,1552563_a_at,1552607_at,...,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11A,ZYG11B,ZYX,ZZEF1,ZZZ3
GSM904985_0014N_Exp.CEL,10.759910,8.168967,6.148286,6.148655,3.526734,6.000679,3.567700,3.420196,4.406512,4.929491,...,6.492485,6.470051,6.311163,5.141446,6.531090,5.263501,8.519608,7.424008,6.131245,7.506218
GSM904986_0014T_Exp.CEL,10.627069,8.110592,6.080880,5.371208,3.637927,6.378871,3.765375,3.907632,3.826696,4.338781,...,6.286828,7.491875,6.998897,5.153848,6.105484,5.527082,8.349207,8.321408,6.346957,7.314657
GSM904988_0026N_Exp.CEL,9.912308,7.682694,6.348944,6.496141,3.910959,6.273258,3.618695,3.352324,4.026041,4.973174,...,5.748054,6.875448,6.024658,5.197408,6.384318,5.878829,9.062821,7.490204,6.189373,7.569571
GSM904989_0026T_Exp.CEL,9.928639,7.883371,5.569622,5.740533,4.167279,6.086869,3.926209,4.000820,3.821508,4.645940,...,6.031481,7.578293,6.753043,5.233582,6.783810,5.051750,8.167005,7.381080,6.078681,7.563371
GSM904990_0042N_Exp.CEL,9.774650,7.406771,5.963449,6.341914,4.242397,6.336297,4.029943,4.483494,4.035602,4.798371,...,5.964617,6.630055,5.452919,4.918226,5.866362,6.279175,8.704949,7.301923,6.059809,7.520830
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GSM305112.CEL,9.285833,7.404473,6.081948,6.165685,3.623140,7.124676,3.767614,4.018204,4.466480,4.825818,...,6.311605,8.180971,6.332484,5.163339,6.301880,5.545485,7.943559,7.335683,6.315128,7.418185
GSM305113.CEL,10.256438,8.009185,4.694489,6.140019,3.725265,6.931847,4.055042,4.138074,4.350811,4.730193,...,6.414612,7.152420,5.885697,5.171652,6.137954,5.507664,8.010172,7.625686,5.855927,6.761258
GSM305114.CEL,11.092388,7.577346,5.547016,5.930863,3.799916,6.136671,3.736061,3.608018,3.942664,4.627653,...,6.173863,8.952424,6.582268,5.146336,6.318175,5.391035,8.517406,8.858774,5.960262,7.712653
GSM305115.CEL,10.170089,7.499742,5.888226,5.880348,4.193258,6.850084,4.166192,4.287220,3.860280,4.833088,...,6.805837,8.734431,6.018990,5.350754,6.120509,5.969984,8.396805,8.194778,6.022666,7.640047


# Feature selection
1. Differential expression analysis
2. Recursive feature elimination
3. Checking correlation

### Differential expression analysis

In [None]:
def get_DEG(p_value, dataset, log_FC):
    """
    Finds differentially expressed genes with the given p_value and fold_change
    
    Args:
    p_value - int value for specifying the p value for stastical significance
    dataset - dataset on which the differential expression analysis is to be performed
    fold_change - int value depicting fold change
    
    Returns:
    A dataset containing genes that are differentially expressed with given p value and fold change
    """
    
    data["sample_type"] = labels
    groups = data.groupby("sample_type")
    normal_sample = groups.get_group("normal")
    tumor_sample = groups.get_group("tumor")
    tumor_sample = tumor_sample.drop(["sample_type"], axis = 1)
    normal_sample = normal_sample.drop(["sample_type"], axis = 1)

    columns = normal_sample.columns
    u_data = []
    t_data = []
  
    for i in range(0,(len(columns)-1)):
        a = normal_sample[columns[i]]
        b = tumor_sample[columns[i]]
        # T-test    
        t,p = ttest_ind(a,b)
        if p<p_value:
            t_data.append(columns[i])

    common_genes = list(set(u_data).intersection(t_data))

    p_data = pd.DataFrame(kidney_data, columns = common)
    p_data["samples"] = labels

    group = p_data.groupby("samples")
    n = group.get_group("normal")
    t = group.get_group("tumor")
    t.drop("samples", axis = 1, inplace = True)
    n.drop("samples", axis = 1, inplace = True)

    fold = []
    for i in t.columns:
        f = t[i].mean() - n[i].mean()
        if f < -(log_FC) or f >(log_FC):
            fold.append(i)
    fold2 = pd.DataFrame(kidney_data, columns = fold)
  
    return fold2


### Recursive feature elimination

In [None]:
def svc_rfe_cv(dataset, label):
    """
    Performing recursive feature elimination using support vector classifier with 10 fold cross validation
    
    Args:
    dataset - training data
    label - trainig data labels
    
    Returns:
    A  list of most informative columns according to SVC_RFE
    """
    estimator = SVC(kernel="linear")
    selector = RFECV(estimator, min_features_to_select=100, step=1, cv = 10)
    selector = selector.fit(dataset, label)
    output = dataset[dataset.columns[selector.get_support()]]
    return output

Performing Differential expression analysis and recursive feature elimination

In [None]:
deg_data = get_DEG(0.0001,data, 1)
training_data = svc_rfe_cv(deg_data, labels)

### Correlation

In [None]:
corr = training_data.corr()
fig, ax = plt.subplots(figsize=(30, 30))
colormap = sns.diverging_palette(220, 10, as_cmap=True)
sns.heatmap(corr,vmin=-1, vmax=1, cmap=colormap)
plt.xticks(range(len(corr.columns)), corr.columns);
plt.yticks(range(len(corr.columns)), corr.columns)
plt.savefig('C:/Users/TEJINDER SINGH/Downloads/New folder (13)/heatmap.png')
plt.show()

# Single experiment dataset
Extracting the data of a single experiment from the meta-dataset. The extracted data include 144 samples.

In [None]:
indx = []
for i in training_data.index:
    if i.startswith("GSM130"):
        indx.append(i)
        
training_data['lbl'] = labels
single_train_set = training_data.T[indx]
single_train_set = single_train_set.T
single_train_set        

In [None]:
single_lbl = single_train_set.pop("lbl")

# Test dataset

In [None]:
def get_test_data(path):
    """
    Reads and load the test dataset, extract selected features from the test dataset
    and generate binary labels from test dataset labels
    
    Args:
    path - path to the Processed test dataset
    
    Returns:
    testing_data - Final test dataset
    test_labels - Labels for the test dataset
    test_labels_binary - labels with 0 (for normal sample) and 1 (for tumour samples)
    """
    test_data = pd.read_csv(path)
    testing_data = test_data[training_data.columns]
    testing_data = testing_data.set_index(test_data.index)
    test_labels = test_data.pop("labels")

    test_labels_binary =[]
    for i in test_labels:
        if i == "tumor":
            test_labels_binary.append(1)
        else:
            test_labels_binary.append(0)
    return testing_data, test_labels,test_labels_binary

Split data into training and testing data with 1:1 ratio 

In [None]:
X_train, X_test, y_train, y_test = train_test_split(training_data, labels, 
                                                    test_size = 0.5,  
                                                    random_state = 42) 

# Loading Test dataset
#### Test dataset was processed using bioconductor package for R

In [None]:
test_set_path = "C:/Users/TEJINDER SINGH/Downloads/New folder (13)/test_gse66272.csv"
testing_data, test_labels, test_labels_binary = get_test_data(test_set_path)

testing_data["label"] = test_labels
testing_data["binary"] = test_labels_binary
X_test["label"] = y_test
X_test["binary"] = ytest

test_df = pd.concat([testing_data, X_test])
test_binary_labels = test_df.pop("binary")
test_lab = test_df.pop("label")
test_df

## Variables

### Training data 
 training_data - Training meta-dataset (174 samples),                                                       
   labels        - Labels for meta-dataset

 single_train_set - Training data from single experiment,                                                                 
   single_lbl       - Labels for single experiment
   
### Test data
 test_df - Testing meta-dataset (228 samples),                                                                     
 test_lab - labels for test meta-dataset,                                                                              
 test_binary_labels - binary labels ("0" for normal sample and "1" for tumour sample)
 

# Hyperparameter optimization

In [None]:
def get_best_model(classifier, parameters):
    """
    Performing Grid Search with 5 fold cross validation to find the best hyperparameters
    for machine learning models
    
    Args:
    classifiers - Machine learning classifier
    parameters - dictionary of a range of hyperparameters for a given classifier
    
    Returns 
    dictionary of best parameters,
    Machine learning classifier with best score
    """
    CV = GridSearchCV(classifier, param_grid=parameters)
    CV.fit(training_data, labels)
    return CV.best_params_, CV.best_estimator_



Choosing classifiers and parameters

In [None]:
#XGB
xgb = XGBClassifier(learning_rate=0.001, objective='binary:logistic',tree_method='gpu_hist')
param_grid_xgb = {'min_child_weight': [1,3,5],
                  'gamma': [0.5, 1, 2],
                  'max_depth': [3,4,6],
                  'n_estimators': [100,300,500,700]}

#Decision Tree
dtc= DecisionTreeClassifier()
param_grid_dtc = {'max_features': ['auto', 'sqrt', 'log2'],
                 'min_samples_split': [2,3,4,5,6,7,8,9,10,11,12,13,14,15], 
                 'min_samples_leaf':[1,2,3,4,5,6,7,8,9,10,11]}

#Logistic regression
lr = LogisticRegression(penalty='l2')
param_grid_lr = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
                 'tol':[0000.1,0.0001,000.1,00.1,0.1]}

#Random Forest
rfc = RandomForestClassifier()
param_grid_rfc = {'n_estimators': [100, 200, 300, 400, 500],
                 'max_features': ['auto', 'sqrt', 'log2'],
                 'max_depth' : [4,5,6,7,8],
                 'criterion' :['gini', 'entropy']}

#Support Vector Machine
svm = SVC(probability=True)
param_grid_svm = {'C': [0.001, 0.01, 0.1, 1, 10],
                  'gamma' : ['scale', 'auto']}


#ADaboost
adb = AdaBoostClassifier()
param_grid_adb = {'base_estimator': [RandomForestClassifier(),DecisionTreeClassifier()],
                  'n_estimators' : [10,20,30,40,50,60,70,80,90,100,150,200]}

#GPC
gpc = GaussianProcessClassifier(kernel=DotProduct(1.0))
param_grid_gpc = {'kernel':[DotProduct(i) for i in [0.2, 0.5, 1,2,3,5]]  + [RBF(i) for i in [0.2, 0.5, 1,2,3,5]]}



performing GridSearchCV

In [None]:
xgb_best_parameters, xgb_best_model = get_best_model(xgb, param_grid_xgb)
dtc_best_parameters, dtc_best_model = get_best_model(dtc, param_grid_dtc)
lr_best_parameters, lr_best_model   = get_best_model(lr, param_grid_lr)
rfc_best_parameters, rfc_best_model = get_best_model(rfc, param_grid_rfc)
svm_best_parameters, svm_best_model = get_best_model(svm, param_grid_svm)
adb_best_parameters, adb_best_model = get_best_model(adb, param_grid_adb)
gpc_best_parameters, gpc_best_model = get_best_model(gpc, param_grid_gpc)

# Training
## Adding the classifiers with the best parameters to a list 

In [None]:
# ML models for meta-dataset
meta_models = []
meta_models.append(('LR', LogisticRegression(C=0.1, tol=0.1,)))
meta_models.append(('LDA', LinearDiscriminantAnalysis()))
meta_models.append(('QDA', QuadraticDiscriminantAnalysis()))
meta_models.append(('DT', DecisionTreeClassifier(min_samples_leaf=4, min_samples_split=13,splitter='best')))
meta_models.append(('NB', GaussianNB()))
meta_models.append(('SVM', SVC(C=10, gamma='scale', kernel='rbf',probability=True)))
meta_models.append(('ADB', AdaBoostClassifier(n_estimators=20)))
meta_models.append(('RF', RandomForestClassifier(criterion='gini', max_depth=6, min_samples_leaf=1, min_samples_split=2,
                       n_estimators=100)))
meta_models.append(('GPC', GaussianProcessClassifier(kernel=RBF(length_scale=1))))
meta_models.append(('XGB', XGBClassifier(booster='gbtree', colsample_bylevel=1,
                    learning_rate=0.001, max_depth=6,
                    min_child_weight=5,n_estimators=700,
                    objective='binary:logistic')))

In [None]:
# ML models for single experiment dataset
single_models = []
single_models.append(('LR', LogisticRegression(C=0.1, tol=0.1,)))
single_models.append(('LDA', LinearDiscriminantAnalysis()))
single_models.append(('QDA', QuadraticDiscriminantAnalysis()))
single_models.append(('DT', DecisionTreeClassifier(min_samples_leaf=4, min_samples_split=13,splitter='best')))
single_models.append(('NB', GaussianNB()))
single_models.append(('SVM', SVC(C=10, gamma='scale', kernel='rbf',probability=True)))
single_models.append(('ADB', AdaBoostClassifier(n_estimators=20)))
single_models.append(('RF', RandomForestClassifier(criterion='gini', max_depth=6, min_samples_leaf=1, min_samples_split=2,
                       n_estimators=100)))
single_models.append(('GPC', GaussianProcessClassifier(kernel=RBF(length_scale=1))))
single_models.append(('XGB', XGBClassifier(booster='gbtree', colsample_bylevel=1,
                     learning_rate=0.001, max_depth=6,
                     min_child_weight=5,n_estimators=700,objective='binary:logistic')))

Training

In [None]:
# Converting binary labels into 0 and 1
ytest =[]
for i in y_test:
    if i == "tumor":
        ytest.append(1)
    else:
        ytest.append(0)

# meta Model_training
for name, model in meta_models:
    model.fit(X_train, y_train)

# single Model_training
for name, model in single_models:
    model.fit(single_train_set, single_lbl)
    

# After Data processing and Training machine learning classifiers the models were tested on the test dataset

In [None]:
def get_results(name,model, test_data, test_label, binary_labels, path):
    """
    This function makes prediction on the test dataset and returns the results
    
    Args:
    name - String ,name of the classifier
    model - classifier object
    test_dataset - test dataframe
    test_label - labels for the test dataframe
    binary_labels - labels for the test data set in binary form ("0" for normal sample and "1" for tumour sample)
    path - path to save confusion matrix
    
    Returns:
    clf_r - classificationreport
    accuracy - accuracy of the classifier on test dataset
    fpr1 - False positive rate
    tpr1 - True positive rate
    roc_auc - Area under the ROC curve
    precision - Precision of classifier on test dataset
    recall - Recall of the classifier on test dataset
    f1 - F1 score
    log_los - Log loss
    ap - Average precision
    """
    
    prediction = model.predict(test_data)
    clf_r = classification_report(test_label, prediction)
    accuracy = accuracy_score(test_label, prediction)
    #print(clf_r,"\n",f"Accuracy = {accuracy*100}%")
    #plot_roc(model, testing_data, binary_labels)
    
    pred_binary = []
    for i in prediction:
        if i == "tumor":
            pred_binary.append(1)
        else:
            pred_binary.append(0)
    probs = model.predict_proba(test_data)[:,1]
    #print(probs)        
    fpr1, tpr1, _ = roc_curve(binary_labels, probs)
    roc_auc = auc(fpr1, tpr1)
    log_los = log_loss(binary_labels, probs)
    
    precision, recall, _ = precision_recall_curve(binary_labels, probs)
    f1 = f1_score(binary_labels, pred_binary) 
    label = ["tumor", "normal"]
    
    cm = confusion_matrix(test_label, prediction,labels = label)
    print(cm)
    cm_display = ConfusionMatrixDisplay(cm,display_labels = label).plot(cmap = "Reds")
    plt.savefig(f'{path}{name}.png')
    plt.show()
    ap = average_precision_score(binary_labels, pred_binary)
    #wrong predictions indices
    #indices = [i for i in range(len(test_label)) if test_label[i] != prediction[i]]
    
    #try:
    #    print("Getting features")
    #    model1 = SelectFromModel(model, prefit=True)
    #    feature_idx = model1.get_support()
                #print(feature_idx)
    #except:
    #    feature_idx = 0
    #    print("No features available")
    #data.rename(columns={"Symbol":i}, inplace=True)
    #feat = pd.concat([feat,data],axis = 1)

    return clf_r, accuracy, fpr1, tpr1, roc_auc, precision, recall,  f1, log_los, ap


In [None]:
def plot_pr_curve(result, path):
    """ 
    Plots the Precision recall curve
    
    Args:
    result - Result dataframe containing the following columns
             'recall'
             'precision'
             'AP'
    path - path to save the PR plot
    
    Returns:
    None
    """

    fig = plt.figure(figsize=(8,6))

    for i in result.index:
        plt.plot(result.loc[i]['recall'], 
                 result.loc[i]['precision'], 
                 label = f"{i}, AP={result.loc[i]['AP']}")

    #plt.plot([0,1], [0,1], color='orange', linestyle='--')
    plt.xticks(np.arange(0.0, 1.1, step=0.1))
    plt.xlabel("recall", fontsize=15)

    plt.yticks(np.arange(0.0, 1.15, step=0.1))
    plt.ylabel("precision", fontsize=15)

    plt.title('Precision Recall Curve', fontweight='bold', fontsize=15)
    plt.legend(prop={'size':13}, loc='center left', bbox_to_anchor=(1, 0.5))
    ax = fig.add_subplot(111)
    lgd = ax.legend(prop={'size':13}, loc='center left', bbox_to_anchor=(1, 0.5))
    plt.savefig(path,bbox_extra_artists=(lgd,), bbox_inches='tight')
    plt.show()
    
def plot_roc_curve(result, path):
    """ 
    Plots the ROC curve
    
    Args:
    result - Result dataframe containing the following columns
             'fpr'
             'tpr'
             'AUC'
    path - path to save the ROC plot
    
    Returns:
    None
    """
    
    fig = plt.figure(figsize=(8,6))

    for i in result.index:
        plt.plot(result.loc[i]['fpr'], 
                 result.loc[i]['tpr'], 
                 label = f"{i}, AUC={result.loc[i]['auc']}" )

    #plt.plot([0,1], [0,1], color='orange', linestyle='--')
    plt.xticks(np.arange(0.0, 1.1, step=0.1))
    plt.xlabel("Flase Positive Rate", fontsize=15)

    plt.yticks(np.arange(0.0, 1.1, step=0.1))
    plt.ylabel("True Positive Rate", fontsize=15)

    plt.title('ROC Curve', fontweight='bold', fontsize=15)
    plt.legend(prop={'size':13}, loc='center left', bbox_to_anchor=(1, 0.5))
    ax = fig.add_subplot(111)
    lgd = ax.legend(prop={'size':13}, loc='center left', bbox_to_anchor=(1, 0.5))
    
    plt.savefig(path,bbox_extra_artists=(lgd,), bbox_inches='tight')
    plt.show()    

# Making predictions and result evaluations

In [1]:
# Meta dataset model evaluation
meta_clf_report_test = []
meta_result_table_test = pd.DataFrame(columns=['classifiers','accuracy', 'fpr','tpr','auc', 'precision', 'recall', 'f1 score', 'AP'])
meta_cm_path = "C:/Users/TEJINDER SINGH/Downloads/Project results/meta_"

for name, model in meta_models:
    print(name)
    clf_r, acc, fpr, tpr, roc_auc, precision, recall,  f1 ,log_los,ap = get_results(name,model,test_df,lab, binary_labels, meta_cm_path)
    meta_clf_report_test.append((name,clf_r))
    meta_result_table_test  = meta_result_table_test.append({'classifiers': name,
                                                       'accuracy' : f"{acc*100}%",
                                                       'fpr':fpr, 
                                                       'tpr':tpr, 
                                                       'auc':roc_auc,
                                                       'precision' : precision,
                                                       'recall' : recall,
                                                       'f1 score' : f1,
                                                      'log_loss' : log_los,
                                                      'AP' : ap}, ignore_index=True)

meta_result_table_test.set_index('classifiers', inplace=True)
meta_result_table_test

NameError: name 'pd' is not defined

In [None]:
#single_models evaluation
single_clf_report_test = []
single_result_table_test = pd.DataFrame(columns=['classifiers','accuracy', 'fpr','tpr','auc', 'precision', 'recall', 'f1 score', 'AP'])
single_cm_path = "C:/Users/TEJINDER SINGH/Downloads/Project results/single_"

for name, model in single_models:
    print(name)
    clf_r, acc, fpr, tpr, roc_auc, precision, recall,  f1 ,log_los,ap = get_results(name,model,test_df,lab, binary_labels, single_cm_path)
    single_clf_report_test.append((name,clf_r))
    single_result_table_test  = result_table_test.append({'classifiers': name,
                                                       'accuracy' : f"{acc*100}%",
                                                       'fpr':fpr, 
                                                       'tpr':tpr, 
                                                       'auc':roc_auc,
                                                       'precision' : precision,
                                                       'recall' : recall,
                                                       'f1 score' : f1,
                                                      'log_loss' : log_los,
                                                      'AP' : ap}, ignore_index=True)

single_result_table_test.set_index('classifiers', inplace=True)
single_result_table_test

In [None]:
# Plot Precision recall curve meta_dataset
path_pr = 'C:/Users/TEJINDER SINGH/Downloads/Project results/meta_PR_curve.png'
plot_pr_curve(meta_result_table_test)

In [None]:
# Plot Precision recall curve single_dataset
path_pr = 'C:/Users/TEJINDER SINGH/Downloads/Project results/single_PR_curve.png'
plot_pr_curve(single_result_table_test)

In [None]:
# ROC curve meta_dataset
path_roc = 'C:/Users/TEJINDER SINGH/Downloads/Project results/meta_ROC_curve.png'
plot_roc_curve(meta_result_table_test, path_roc)

In [None]:
# ROC curve single_dataset
path_roc = 'C:/Users/TEJINDER SINGH/Downloads/Project results/single_ROC_curve.png'
plot_roc_curve(single_result_table_test, path_roc)

In [None]:
# Save the result table meta_data
result_output = meta_result_table_test[['accuracy', 'auc', 'f1 score', 'AP', 'log_loss']]
result_output.to_csv('C:/Users/TEJINDER SINGH/Downloads/Project results/result_single_exp_data.csv',index = True)

In [None]:
# Save the result table single data
result_output = single_result_table_test[['accuracy', 'auc', 'f1 score', 'AP', 'log_loss']]
result_output.to_csv('C:/Users/TEJINDER SINGH/Downloads/Project results/result_single_exp_data.csv',index = True)