In [None]:
import os
import pandas as pd
import csv  
import glob
import re
import ast
from collections import defaultdict 

In [None]:
data = pd.read_csv('accVsprop.csv')
pepNames = data['pepName'].unique()
data

In [None]:
properties = ['basicity', 'hydrophobicity', 'helicity', 'mutation_stability']
length = 20

def findGraphValues(df, prop, initialSize):
    finalValues = defaultdict(int)
    toAdd = df[prop].tolist()
    for item in toAdd:
        castedItem = ast.literal_eval(item)
        for value in castedItem:
            if value != 0:
                finalValues[float(value)] += 1
    return finalValues

def populateDataframe(values, correct, incorrect):
    rowsForDF = []
    rowsForGT = []
    correctTotal = sum(correct.values())
    incorrectTotal = sum(incorrect.values())
    for i in values:
        correctValue = correct.get(i, 0) / correctTotal
        incorrectValue = incorrect.get(i, 0) / incorrectTotal
        both = (correct.get(i, 0) + incorrect.get(i, 0)) / (correctTotal + incorrectTotal)
        rowsForDF.append([i, correctValue, incorrectValue])
        rowsForGT.append([i, both])
    toPlot = pd.DataFrame(rowsForDF, columns=['value', 'correct', 'incorrect'])
    toPlot = toPlot.set_index('value')
    groundTruth = pd.DataFrame(rowsForGT, columns=['value', 'probability'])
    groundTruth = groundTruth.set_index('value')
    return toPlot, groundTruth


In [None]:
for prop in properties:
    for name in pepNames:
        peptideName = re.sub('@', '', name)
        filteredData = data.loc[data['pepName'] == name]
        incorrect = filteredData.loc[data['correct'] == 0]
        correct = filteredData.loc[data['correct'] == 1]
        ivalues = findGraphValues(incorrect, prop, length)
        cvalues = findGraphValues(correct, prop, length)
        propValues = list(set(ivalues.keys()).union(set(cvalues.keys())))
        propValues.sort()
        toPlot, groundTruth = populateDataframe(propValues, cvalues, ivalues)
        title = f'{peptideName} {prop} '
        fig = toPlot.plot.area(alpha=0.75, stacked=False, title=title + 'analysis', xlabel=prop, ylabel='probability').get_figure()
        fig.savefig(f'figures/physchm/{title}analysis.png', dpi=300)
        fig =groundTruth.plot.area(alpha=1, stacked=False, title=title + 'ground truth', xlabel=prop, ylabel='probability').get_figure()
        fig.savefig(f'figures/physchm/{title}ground truth.png', dpi=300)
        