In [137]:
from pathlib import Path

In [138]:
Path("../../Protein").mkdir(exist_ok = True)

In [139]:
%%writefile ../../Protein/__init__.py
# __init__ py is required in a folder 
# to be recognized as a python module
# otherwise the import statements won't work
# the %%writefile magic allows the jupyter cell content to be stored as a file

# lets load core into the name space as well
from . import Protein

Overwriting ../../Protein/__init__.py


In [168]:
%%writefile ../../Protein/Protein.py
#
# The first version of our function!
# Write doc strings 
#

from collections import deque
import pandas as pd
import plotly.graph_objs as go
import numpy as np

class Protein:


    def __init__(self, ID = None, name = None, metrics = None, AAsequence = None, file = None):
        self.ID = ID
        self.name = name
        self.metrics = metrics
        self._AAsequence = AAsequence
        self.fasta_file = file
        self._identifier = None
        self._df = None
    
    
    def open_fasta_file(self, file = None):
        if file != None:
            self.fasta_file = file
        aa_seq = []                 
        with open(self.fasta_file) as f:
            for line in f:                          
                aa_seq.append(line)   
        sequence = []
        for i, line in enumerate(aa_seq):         
            line = line.replace("\n","")                
            if line.startswith(">"):               
                identifier = line.replace(">","")
            else:
                for aa in line:
                    sequence += aa                 
        self._AAsequence = sequence
        self._identifier = identifier
        return [self._identifier, self._AAsequence]

    @property
    def AAsequence(self):
        if self.fasta_file == None:
            print("no .fasta file")
        else:
            self._AAsequence = self.open_fasta_file(self.fasta_file)[1]
            return self._AAsequence
    @property
    def identifier(self):
        if self.fasta_file == None:
            print("no .fasta file")
        else:
            self._identifier = self.open_fasta_file(self.fasta_file)[0]
            return self._identifier
    

    def create_df(self):
        list = []
        if self.fasta_file == None or self.metrics == None:
            return "missing metrics or .fasta file"
        else:
            self._AAsequence = self.open_fasta_file(self.fasta_file)[1]
            for x in self._AAsequence:
                metric = {}
                for key, value in self.metrics.items():
                    metric.update({key: value[x]})
                list.append(metric)
            self._df = pd.DataFrame(list)
            return self._df


        
    def averaging_metric(self, metric, window_size):
        self._df = self.create_df()
        window = deque([], maxlen=window_size)
        average = []
        for pos, aa in enumerate(self.AAsequence):
            value = self._df[metric][pos]
            window.append(value)
            average.append(sum(window)/len(window))
        return average

    def plot(self, metric = "hydropathy", window_size = 5):
        self._df = self.create_df()
        if metric == "hydropathy":
            metric = "hydropathy index (Kyte-Doolittle method)"

        layout = {"title": {"text": "{0}, averaging window size: {1}".format(self.name, window_size)},
                "template" : "plotly", 
                "yaxis": {"title": {"text": metric}},
                "xaxis": {"title": {"text": "amino acid position"}}
                }

        average = self.averaging_metric(metric, window_size = window_size)

        data = [
            go.Bar(
                x = self._df.index,
                y = np.array(average),
                hovertext="Name:"+self._df["Name"] + "<br />" +\
                        "abbr.:" + self._df["3-letter code"]+ ", " + self._df["1-letter code"]
            )
        ]

        fig = go.Figure(data = data, layout = layout)
        return fig

        

Overwriting ../../Protein/Protein.py


In [169]:
%%writefile ../../tests/test_Protein.py
import sys
from pathlib import Path
# -------- START of inconvenient addon block --------
# This block is not necessary if you have installed your package
# using e.g. pip install -e (requires setup.py)
# or have a symbolic link in your sitepackages (my preferend way)
sys.path.append(
    str(Path(__file__).parent.parent.resolve())
)
# It make import peak_finder possible
# This is a demo hack for the course :)
# --------  END of inconvenient addon block  --------

import Protein


from collections import deque
import pandas as pd
import plotly.graph_objs as go
from pathlib import Path

def test_creates_object():
    P = Protein.Protein.Protein()
    assert isinstance(P, Protein.Protein.Protein) 

def test_attributes():
    P = Protein.Protein.Protein()
    ID = "P32249"
    name = "G-protein coupled receptor 183"
    P.ID = ID
    P.name = name
    assert [P.ID, P.name] == ["P32249", "G-protein coupled receptor 183"]

def test_None_aattributes():
    P = Protein.Protein.Protein()
    assert [P.ID, P.name, P.metrics, P.AAsequence, P.fasta_file, P.identifier, P.df] == [None, None, None, None, None, None, None]

def test_create_df():
    aa_df = pd.read_csv("C:/Users/Selina Ernst/Documents/GitHub/advanced_python_2021-22_HD/data/amino_acid_properties.csv")
    aa_df = aa_df.set_index("1-letter code", drop = False)
    metrics = aa_df.to_dict()
    file = "C:/Users/Selina Ernst/Documents/GitHub/advanced_python_2021-22_HD/exercises/day5/P32249.fasta"
    P = Protein.Protein.Protein(file = file, metrics = metrics)
    assert len(P.create_df()) == len(P.AAsequence)

def test_not_create_df():
    file = "C:/Users/Selina Ernst/Documents/GitHub/advanced_python_2021-22_HD/exercises/day5/P32249.fasta"
    P = Protein.Protein.Protein(file = file)
    assert P.create_df() == "missing metrics or .fasta file"

def test_plot():
    aa_df = pd.read_csv("C:/Users/Selina Ernst/Documents/GitHub/advanced_python_2021-22_HD/data/amino_acid_properties.csv")
    aa_df = aa_df.set_index("1-letter code", drop = False)
    metrics = aa_df.to_dict()
    file = "C:/Users/Selina Ernst/Documents/GitHub/advanced_python_2021-22_HD/exercises/day5/P32249.fasta"
    P = Protein.Protein.Protein(file = file, metrics = metrics)
    isinstance(P.plot(), go._figure.Figure)

Overwriting ../../tests/test_Protein.py
