In [None]:
import os
import pandas as pd
import numpy as np
import csv  
import glob
import logomaker as lm
import re
import matplotlib.pyplot as plt
%matplotlib inline
plt.ion()
from collections import defaultdict 

In [None]:
files = glob.glob(os.getcwd() + "/seqLogo/*.csv")
allPeptides = {}

def createTwoLists():
    return [[], []]

for fileName in files:
    openedFile = open(fileName, 'r')
    f = csv.reader(openedFile, delimiter=",")
    for line in f:  
        # Only get supposed to bind ones
        if line[2] == '1':
            peptideName = re.sub('@', '', line[0])
            if peptideName not in allPeptides:
                allPeptides[peptideName] = createTwoLists()
            newGaps = re.sub('@', '.', line[1])
            if line[3] == '1':
                allPeptides[peptideName][0].append(newGaps)
            else:
                allPeptides[peptideName][1].append(newGaps)

In [None]:
def printSeqLogoWithIC(sequences, name):
    ww_counts_df = lm.alignment_to_matrix(sequences=sequences, to_type='information', characters_to_ignore='.-X')
    # Crop to Content
    #num_seqs = ww_counts_df.sum(axis=1)
    #pos_to_keep = num_seqs > 0
    #ww_counts_df = ww_counts_df[pos_to_keep]
    #ww_counts_df.reset_index(drop=True, inplace=True)
    # Make logo
    logo = lm.Logo(ww_counts_df, font_name = 'Arial Rounded MT Bold', color_scheme='chemistry')
    plt.suptitle(name)
    plt.ylabel("Entropy (bits)")
    plt.xlabel("Position")
    plt.tight_layout()
    plt.savefig(f'figures/seqlogo/{name}.png')
    plt.show()
    

In [None]:
def printLogo(peptide, results):
    print(peptide)
    correct = results[0]
    incorrect = results[1]
    printSeqLogoWithIC(correct, f'{peptide} correct')
    printSeqLogoWithIC(incorrect, f'{peptide} incorrect')

for peptideName in allPeptides.keys():
    print(peptideName)

In [None]:
printLogo('RAKFKQLL', allPeptides['RAKFKQLL'])

In [None]:
printLogo('GLCTLVAML', allPeptides['GLCTLVAML'])

In [None]:
printLogo('NLVPMVATV', allPeptides['NLVPMVATV'])


In [None]:
printLogo('GILGFVFTL', allPeptides['GILGFVFTL'])


In [None]:
printLogo('FLKEKGGL', allPeptides['FLKEKGGL'])


In [None]:
printLogo('IPSINVHHY', allPeptides['IPSINVHHY'])


In [None]:
printLogo('TPRVTGGGAM', allPeptides['TPRVTGGGAM'])


In [None]:
printLogo('LLWNGPMAV', allPeptides['LLWNGPMAV'])


In [None]:
printLogo('KRWIILGLNK', allPeptides['KRWIILGLNK'])


In [None]:
printLogo('GTSGSPIINR', allPeptides['GTSGSPIINR'])


In [None]:
printLogo('GTSGSPIVNR', allPeptides['GTSGSPIVNR'])


In [None]:
printLogo('YVLDHLIVV', allPeptides['YVLDHLIVV'])

In [None]:
printLogo('ATDALMTGY', allPeptides['ATDALMTGY'])

In [None]:
printLogo('EIYKRWII', allPeptides['EIYKRWII'])

In [None]:
printLogo('RPRGEVRFL', allPeptides['RPRGEVRFL'])

In [None]:
printLogo('KAFSPEVIPMF', allPeptides['KAFSPEVIPMF'])

In [None]:
printLogo('KLVALGINAV', allPeptides['KLVALGINAV'])

In [None]:
printLogo('CINGVCWTV', allPeptides['CINGVCWTV'])

In [None]:
printLogo('GPGHKARVL', allPeptides['GPGHKARVL'])

In [None]:
printLogo('PKYVKQNTLKLAT', allPeptides['PKYVKQNTLKLAT'])

In [None]:
printLogo('TPQDLNTML', allPeptides['TPQDLNTML'])
