In [2]:
import plotly.io as pio
pio.renderers.default = "browser"
import pandas as pd
import plotly.graph_objs as go
from collections import deque
import requests


class Protein(object):
    def __init__(self, protein_id):
        """
        initializes an object of class protein for a given protein id
        Args:
            protein_id: Is the unique ID given to each protein

        """
        self.protein_id = protein_id

    def get_data(self):
        """
        downloads the fasta-file and extracts the 1-letter code of the amino acids
        for the protein
        Returns:
            List containing the 1-letter code of each amino acid contained in the protein

        """
        url = 'https://www.uniprot.org/uniprot/' + self.protein_id + '.fasta?fil=reviewed:yes'
        r = requests.get(url)
        fasta_file_protein = '../data/' + self.protein_id + '.fasta'
        with open(fasta_file_protein, 'wb') as file:
            file.write(r.content)
            file.close()

        with open(fasta_file_protein, 'r') as file:
            read_data = file.read()
            read_data_split = read_data.split('\n')
            sequences = ""
            for line in read_data_split:
                if not line.startswith('>'):
                    sequences += line
        return sequences

    def map(self, lookup:dict, aa_property:str, window_len = 1):
        """
        matches each amino acid in the sequence with its property and
        substitutes the values before averaging them for a sliding window
        Args:
            lookup: Dictionary containing
            aa_property: The property the protein has to be analyzed
            window_len: Number of amino acids which the property will be averaged over
        Returns:
            List containing the property for the amino acids after having been averaged
            for the given sliding window

        """
        mapping_dict = lookup[aa_property]
        seq = self.get_data()

        property_list = [mapping_dict[aa] for aa in seq]
        window = deque([], window_len)
        property_mean = []
        for aa_property in property_list:
            window.append(aa_property)
            window_mean = sum(window) / len(window)
            property_mean.append(window_mean)
        return property_mean

def extract_property_dict():
    """
    Takes a dictionary and builds a nested dictionary with dictionaries which
    each contain the properties for the amino acids paired with these
    Returns:
        Nested dictionary containing dictionaries for different properties
        of the amino acids

    """
    property_dictionary = pd.read_csv("../data/amino_acid_properties.csv")
    property_dictionary = pd.DataFrame.to_dict(property_dictionary)
    one_letter_code = property_dictionary['1-letter code'].values()
    lookup = {}
    for pos, aa_property in enumerate(property_dictionary.keys()):
        if pos>2:
            prop = property_dictionary[aa_property].values()
            prop_list = dict(zip(one_letter_code, prop))
            lookup[aa_property] = prop_list
    return lookup

lookup = extract_property_dict()


GPCR = Protein("P32249")
sequence = GPCR.get_data()
print(sequence)
hydropathy_one = GPCR.map(lookup, 'hydropathy index (Kyte-Doolittle method)', 1)
hydropathy_twenty = GPCR.map(lookup, 'hydropathy index (Kyte-Doolittle method)', 20)

sequence_position = list(range(361))
aa_hydropathy_one = pd.DataFrame({'sequence position': sequence_position, 'hydropathy': hydropathy_one})
aa_hydropathy_twenty = pd.DataFrame({'sequence position': sequence_position, 'hydropathy': hydropathy_twenty})

data_one = [
    go.Bar(
        x=aa_hydropathy_one["sequence position"],
        y=aa_hydropathy_one['hydropathy'],
    )
]

fig_one = go.Figure(data=data_one)
fig_one.update_layout(xaxis = dict(title = 'Sequence position'), yaxis = dict(title = 'Hydropathy'), title="Hydropathy of AAs in GPCR 183 with a window of 1")
fig_one.show()

data_twenty = [
    go.Bar(
        x=aa_hydropathy_twenty["sequence position"],
        y=aa_hydropathy_twenty['hydropathy'],
    )
]

fig_one = go.Figure(data=data_twenty)
fig_one.update_layout(xaxis = dict(title = 'Sequence position'), yaxis = dict(title = 'Hydropathy'), title="Hydropathy of AAs in GPCR 183 with a window of 20")
fig_one.show()

MDIQMANNFTPPSATPQGNDCDLYAHHSTARIVMPLHYSLVFIIGLVGNLLALVVIVQNRKKINSTTLYSTNLVISDILFTTALPTRIAYYAMGFDWRIGDALCRITALVFYINTYAGVNFMTCLSIDRFIAVVHPLRYNKIKRIEHAKGVCIFVWILVFAQTLPLLINPMSKQEAERITCMEYPNFEETKSLPWILLGACFIGYVLPLIIILICYSQICCKLFRTAKQNPLTEKSGVNKKALNTIILIIVVFVLCFTPYHVAIIQHMIKKLRFSNFLECSQRHSFQISLHFTVCLMNFNCCMDPFIYFFACKGYKRKVMRMLKRQVSVSISSAVKSAPEENSREMTETQMMIHSKSSNGK
[1.9, -3.5, 4.5, -3.5, 1.9, 1.8, -3.5, -3.5, 2.8, -0.7, -1.6, -1.6, -0.8, 1.8, -0.7, -1.6, -3.5, -0.4, -3.5, -3.5, 2.5, -3.5, 3.8, -1.3, 1.8, -3.2, -3.2, -0.8, -0.7, 1.8, -4.5, 4.5, 4.2, 1.9, -1.6, 3.8, -3.2, -1.3, -0.8, 3.8, 4.2, 2.8, 4.5, 4.5, -0.4, 3.8, 4.2, -0.4, -3.5, 3.8, 3.8, 1.8, 3.8, 4.2, 4.2, 4.5, 4.2, -3.5, -3.5, -4.5, -3.9, -3.9, 4.5, -3.5, -0.8, -0.7, -0.7, 3.8, -1.3, -0.8, -0.7, -3.5, 3.8, 4.2, 4.5, -0.8, -3.5, 4.5, 3.8, 2.8, -0.7, -0.7, 1.8, 3.8, -1.6, -0.7, -4.5, 4.5, 1.8, -1.3, -1.3, 1.8, 1.9, -0.4, 2.8, -3.5, -0.9, -4.5, 4.5, -0.4, -3.5, 1.8, 3.8, 2.5, -4.5, 4.5, -0.7, 1.8, 3.8, 4.2, 2.8, -1.3, 4.5, -3.5, -0.7, -