In [None]:
import os

DIR = r'c://downloads'

# Writing classes

In [None]:
class MyClass:
    pass
    
a = MyClass()
print(a)
print(type(a))

In [None]:
a.member1 = 88
a.member2 = 'hello'
print(a.member1, a.member2)

In [None]:
class MyClass:
    
    def get_number_plus_1(self):
        return self.x + 1
    
    def get_number_plus_2(self):
        return self.x + 2
        
a = MyClass()
a.x = 7
print(a.get_number_plus_1(), a.get_number_plus_2())

In [None]:
b = MyClass()
print(b.get_number_plus_1())

In [None]:
class MyClass:
    
    # Constructor
    def __init__(self, x):
        self.x = x
        
    def get_number_plus(self, value = 1):
        return self.x + value

a = MyClass(7)
print(a.get_number_plus(), a.get_number_plus(5))

In [None]:
# A common mistake is to forget adding 'self' as the first argument of a method

class MyClass:
    def do_something(value):
        return value ** 2
        
a = MyClass()
print(a.do_something(3))

# Example: DNA sequence

In [None]:
class DNASeq:
    
    def __init__(self, seq):
        self.seq = seq.upper()
    
    def is_valid(self):
        return set(self.seq) <= set('ACGT')
    
    def transcribe(self):
        return self.seq.replace('T', 'U')
        
seq1 = DNASeq('ACGTTGAGTGCB')
print(seq1.is_valid())

seq2 = DNASeq('ACGTTGAGTGC')
print(seq2.is_valid())
print(seq2.transcribe())

In [None]:
# It is a convention to start "private" members and methods with _

class DNASeq:

    # Class member
    VALID_LETTERS = set('ACGT')
    
    def __init__(self, seq):
        self.seq = seq.upper()
        self._validate()
    
    def transcribe(self):
        return RNASeq(self.seq.replace('T', 'U'))
    
    def _validate(self):
        assert set(self.seq) <= DNASeq.VALID_LETTERS, 'Invalid letters in DNA sequence'
        
class RNASeq:
    
    VALID_LETTERS = set('ACGU')
    
    def __init__(self, seq):
        self.seq = seq.upper()
        self._validate()
        
    def back_transcribe(self):
        return DNASeq(self.seq.replace('U', 'T'))
    
    def _validate(self):
        assert set(self.seq) <= RNASeq.VALID_LETTERS, 'Invalid letters in RNA sequence'
        
dna_seq = DNASeq('ACGTTGAGTGC')
rna_seq = dna_seq.transcribe()
print(rna_seq.seq)
print(dna_seq.seq == rna_seq.back_transcribe().seq)

# Inheritance

In [None]:
class BaseClass:

    def f1(self):
        return 55
        
    def f2(self):
        return 66
        
    def f3(self):
        return self.f1() + self.f2()
        
class SubClass(BaseClass):
    def f2(self):
        return -66
        
a = BaseClass()
print(a.f1(), a.f2(), a.f3())

b = SubClass()
print(b.f1(), b.f2(), b.f3())

In [None]:
# It is very common to inherit from Exception to define custom error types

class InvalidLettersException(Exception):
    pass
    
def validate_dna_seq(dna_seq):
    if not set(dna_seq) <= set('ACGT'):
        raise InvalidLettersException('Invalid letters')
        
try:
    validate_dna_seq('AGGTGBAGT')
except InvalidLettersException as e:
    print(e)

In [None]:
# In Python 2, it used to be important to always inherit from object (no need in Python 3).

class MyClassPython2(object):
    pass
    
class MyClassPython3:
    pass
    
print(isinstance(MyClassPython2(), object))
print(isinstance(MyClassPython3(), object))

In [None]:
# To avoid code duplication, "abstract" classes (that are not meant to be instantiated as they are) can be written.

class Seq:
    
    def __init__(self, seq):
        self.seq = seq.upper()
        self._validate()
        
    def get_valid_letters(self):
        # This type of error is commonly used in abstract classes
        raise NotImplementedError()
        
    def _validate(self):
        assert set(self.seq) <= self.get_valid_letters(), 'Invalid letters'
        
class DNASeq(Seq):
    
    def get_valid_letters(self):
        return set('ACGT')
        
    def transcribe(self):
        return RNASeq(self.seq.replace('T', 'U'))

class RNASeq(Seq):
    
    def get_valid_letters(self):
        return set('ACGU')
        
    def back_transcribe(self):
        return DNASeq(self.seq.replace('U', 'T'))
        
dna_seq = DNASeq('ACGTTGAGTGC')
rna_seq = dna_seq.transcribe()

print('%s --> %s' % (dna_seq.seq, rna_seq.seq))
print(dna_seq.seq == rna_seq.back_transcribe().seq, dna_seq is rna_seq.back_transcribe())

# Practical example - chromosome reader

Sample file: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg38/chromosomes/chr11.fa.gz (__need to uncompress__)

In [None]:
# The original code from lab exercise 3 - doesn't look very neat.
# If we want to work with multiple chromosomes simultaneously, it will be quite complicated this way. 

from Bio.Seq import Seq
from Bio.Alphabet import Alphabet

with open(os.path.join(DIR, 'chr11.fa'), 'r') as f:
    
    header_len = len(f.readline())
    line_len = len(f.readline()) - 1

    def convert_to_absolute_coordinate(position):
        position_zero_index = position - 1
        return header_len + position_zero_index + (position_zero_index // line_len) 

    def read_seq(start, end):

        absolute_start = convert_to_absolute_coordinate(start)
        absolute_length = convert_to_absolute_coordinate(end) - absolute_start + 1

        f.seek(absolute_start)
        str_seq = f.read(absolute_length).replace('\n', '')
        return Seq(str_seq, Alphabet())

    print(read_seq(100500, 100750))

In [None]:
# The OOP version

class ChromosomeReader:

    def __init__(self, file_handler):
        self.file_handler = file_handler
        self.header_len = len(file_handler.readline())
        self.line_len = len(file_handler.readline()) - 1

    def read_seq(self, start, end):

        absolute_start = self.convert_to_absolute_coordinate(start)
        absolute_length = self.convert_to_absolute_coordinate(end) - absolute_start + 1

        self.file_handler.seek(absolute_start)
        str_seq = self.file_handler.read(absolute_length).replace('\n', '')
        return Seq(str_seq, Alphabet())
        
    def convert_to_absolute_coordinate(self, position):
        position_zero_index = position - 1
        return self.header_len + position_zero_index + (position_zero_index // self.line_len)
        
with open(os.path.join(DIR, 'chr11.fa'), 'r') as f:
    chr11_reader = ChromosomeReader(f)
    print(chr11_reader.read_seq(100500, 100750))