# Assess predictions

Evaluate how good the predictions made the logistic regression classifier actually where.

Takes as input:
1. Classifier results file: *.predicted_summary.tsv
2. Input metadata file 'dataset_summary.tsv'
    

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

## Setup (edit as required)

In [None]:
# Setup (edit as required)
classifier_results_file = 'retention_group_1_2_log2.predicted_summary.tsv.gz'
metadata_file = 'dataset_summary.tsv'

## Read in results

In [None]:
## Read in results
print("Reading in classifier results: " + classifier_results_file)
classifier_results = pd.read_csv(classifier_results_file, sep="\t")

print("Reading in datasets metadata: " + metadata_file)
metadata = pd.read_csv(metadata_file, sep="\t")

In [None]:
# Create a single data structure
metadata = metadata.loc[:, ['Accession', 'Cell_line', 'Jerber_model_score', 'Diff_efficiency', 'Result']]

metadata = metadata.rename(columns={'Accession' : 'accession',
                                    'Cell_line' : 'cell_line',
                                    'Jerber_model_score' : 'jerber_model_score',
                                    'Diff_efficiency' : 'experimental_diff_efficiency',
                                    'Result' : 'experimental_differentiated'
                                   })

metadata['experimental_differentiated'] = np.where(metadata['experimental_differentiated'] == 'succeeded', 1, 0)

classifier_results = classifier_results.rename(columns={'differentiated' : 'predicted_differentiated'})

classifier_results['p_coeff_variation'] = classifier_results['p_stdp'] / classifier_results['p_average']

classifier_results = classifier_results.loc[:, ['accession', 'p_average', 'p_stdp', 'p_coeff_variation', 'predicted_differentiated']]  #Nicer order
                                                  
classifier_results = pd.merge(metadata, classifier_results, how='right', on='accession')

classifier_results = classifier_results.sort_values(by=['cell_line', 'p_average'])

classifier_results['correct_prediction'] = np.where(classifier_results['experimental_differentiated'] == classifier_results['predicted_differentiated'], 1, 0)

classifier_results = classifier_results.reset_index(drop=True)

del(metadata)

In [None]:
# Checking for consistency in within cell lines (concordancy of classifier predictions)
consistency_data = classifier_results.loc[:, ['cell_line', 'predicted_differentiated']]

print(f"Number of accessions: {consistency_data.shape[0]}")
print(f"Number of different cell lines: {consistency_data['cell_line'].drop_duplicates().shape[0]}")

boolean_to_select = consistency_data.duplicated(subset='cell_line', keep=False)   #Identfy cell-lines present more than once
consistency_data = consistency_data[boolean_to_select]

print(f"Total number of accessions which are part of a replicated cell line: {consistency_data['cell_line'].shape[0]}")


replicate_groups = consistency_data['cell_line'].drop_duplicates().shape[0]
print(f'Number of replicated cell lines (i.e. replicate groups): {replicate_groups}')

consistency_data = consistency_data.drop_duplicates()

concordant_cell_lines = (consistency_data
                             .duplicated(subset='cell_line', keep=False)
                             .value_counts()
                        )

concordant_cell_lines = concordant_cell_lines[False]
discordant_cell_lines = replicate_groups - concordant_cell_lines

print(f'Concordant cell lines: {concordant_cell_lines}')
print(f'Discordant cell lines: {discordant_cell_lines}')
print(f'%Concordancy: {round(100 * concordant_cell_lines / replicate_groups, 2)}')

In [None]:
#Confusion matrix
cm = confusion_matrix(classifier_results['experimental_differentiated'], classifier_results['predicted_differentiated'])

plt.figure(figsize=(7, 5))
ax = plt.subplot()
sns.heatmap(cm, annot=True, ax = ax, cmap=plt.cm.Blues, fmt='g')

# labels, title and ticks
ax.set_xlabel('Predicted labels')
ax.set_ylabel('True labels')
ax.set_title('Confusion Matrix')
ax.xaxis.set_ticklabels(['Undifferentiated', 'Differentiated'])
ax.yaxis.set_ticklabels(['Undifferentiated', 'Differentiated'])

plt.show()

In [None]:
# Calculate accuracy
acc = accuracy_score(classifier_results['experimental_differentiated'], classifier_results['predicted_differentiated'])
print('Accuracy: ', round(acc, 3))
print('==================================================================================')

# Calculate Cohen's Kappa score
cka = cohen_kappa_score(classifier_results['experimental_differentiated'], classifier_results['predicted_differentiated'])
print("Cohen's Kappa")
print()
print("Cohen suggested the Kappa result be interpreted as follows: ")
print("values ≤ 0 as indicating no agreement\n0.01–0.20 as none to slight\n0.21–0.40 as fair")
print("0.41– 0.60 as moderate\n0.61–0.80 as substantial\n0.81–1.00 as almost perfect agreement.\n")
print('Cohen\'s Kappa: ', round(cka, 3))
print('==================================================================================')

# F1 score
f1 = f1_score(classifier_results['experimental_differentiated'], classifier_results['predicted_differentiated'])
print('F1 Score')
print()
print('F1 = 2 * (precision * recall) / (precision + recall)')
print('A model will obtain a high F1 score if both Precision and Recall are high')
print('A model will obtain a low F1 score if both Precision and Recall are low')
print('A model will obtain a medium F1 score if one of Precision and Recall is low and the other is high')
print()
print(f'F1 score: {round(f1, 3)}')

In [None]:
# Plot a precision recall curve
column_names = ['Precision', 'Recall']
precision_recall = pd.DataFrame(columns=column_names)


p_value_thresholds = (classifier_results
                        .loc[:, 'p_average']
                        .drop_duplicates()
                        .sort_values()
                        .reset_index(drop=True)
                        .iloc[0:-1]    #Remove last value since nothing larger than this
                     )

                        
for p_value_threshold in p_value_thresholds:
    threshold_specific_prediction = np.where(classifier_results['p_average'] > p_value_threshold, 1, 0)

    precision = precision_score(classifier_results['experimental_differentiated'], threshold_specific_prediction)
    recall = recall_score(classifier_results['experimental_differentiated'], threshold_specific_prediction)

    precision_recall_current = pd.DataFrame([[precision, recall]], 
                                            columns=column_names
                                           )
    precision_recall = pd.concat([precision_recall, precision_recall_current],
                                ignore_index=True)

    
# Plot results
plt.figure(figsize=(7,7))
sns.lineplot(data=precision_recall, x='Recall', y='Precision')
plt.title('Precision-Recall Curve')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.xlim(0, 1.05)
plt.ylim(0, 1.05)
plt.show()

plt.figure(figsize=(7,7))
sns.lineplot(data=precision_recall, x='Recall', y='Precision')
plt.title('Precision-Recall Curve (autoscale)')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.show()

In [None]:
#Determine recall value with 100% precision
if precision_recall['Precision'].max() == 1:
    best_recall = (precision_recall
                       .query('Precision == 1')
                       .loc[:, 'Recall']
                       .max()
                  )
else:
    print('Precision never reaches 1')
    
print(f'100% Precision with Recall of {round(best_recall * 100, 1)}%')  

In [None]:
# Scatter plot predicted vs expected
plt.figure(figsize=(9,9))
sns.scatterplot(x='experimental_diff_efficiency', 
            y='p_average',
            hue='experimental_differentiated',
            style='correct_prediction',
            data=classifier_results,
            alpha=0.7)

plt.title('Classifier average p-value vs differentiation efficiency')
plt.xlabel('Experimental differentiation efficiency')
plt.ylabel('Classifier p value')
plt.axhline(0.5, color='r', linestyle='--')
plt.axvline(0.2, color='r', linestyle='--')
plt.xlim(0, 1)
plt.ylim(0, 1)

# Put the legend out of the figure
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

plt.show()

In [None]:
# Show p-value std of variance vs p-value average
plt.figure(figsize=(10 ,10))
sns.scatterplot(x='p_stdp', 
            y='p_average',
            hue='experimental_differentiated',
            style='correct_prediction',
            data=classifier_results,
            alpha=0.7)

plt.title('How classifier prediction success varies with p-value and p-value variation I')
plt.xlabel('Classifier standard deviation of p-value')
plt.ylabel('Classifier average p-value')
plt.axhline(0.5, color='r', linestyle='--')
plt.ylim(0, 1)

# Put the legend out of the figure
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

plt.show()

In [None]:
# Show p-value coefficient of variance vs p-value average
plt.figure(figsize=(10 ,10))
sns.scatterplot(x='p_coeff_variation', 
            y='p_average',
            hue='experimental_differentiated',
            style='correct_prediction',
            data=classifier_results,
            alpha=0.7)

plt.title('How classifier prediction success varies with p-value and p-value variation II')
plt.xlabel('Classifier coefficient of variance of p-value')
plt.ylabel('Classifier average p-value')
plt.axhline(0.5, color='r', linestyle='--')
plt.ylim(0, 1)

# Put the legend out of the figure
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

plt.show()

In [None]:
import plotly.express as px
fig = px.scatter_3d(classifier_results, 
                    x='experimental_diff_efficiency', 
                    y='p_average', 
                    z='p_coeff_variation',
                    color='correct_prediction',
                    opacity=0.7)
fig.show()

In [None]:
print('Done')