# 1.Sequence Analysis

```py
sequence = "ATGTACAACATAGTGATGGAGACGGAGCCATTGAAGCCGCCGGGCCCGCAGCAAACCTTCGGGGGGCGGCGGCGGCAACTCCACCGCGGCATTCGTUAG"
```

Questions

1. Calculate number of nucleotides in the given sequence.
1. Calculate number of codon in the given sequence (if it is all translated)?
1. Check whether the given sequence starts with 'ATG'.
1. Check whether the sequence ends in frame. (the length of the sequence is
divisible by 3)
1. What percentage of sequence is GC ?
1. Print only the first and last codon.
1. Using the dictionary provided below, what amino acid does the second
codon represent?


In [1]:
sequence = "ATGTACAACATAGTGATGGAGACGGAGCCATTGAAGCCGCCGGGCCCGCAGCAAACCTTCGGGGGGCGGCGGCGGCAACTCCACCGCGGCATTCGTUAG"

sequence

'ATGTACAACATAGTGATGGAGACGGAGCCATTGAAGCCGCCGGGCCCGCAGCAAACCTTCGGGGGGCGGCGGCGGCAACTCCACCGCGGCATTCGTUAG'

In [2]:
# A. Calculate number of nucleotides in the given sequence.
seq_len = len(sequence)
seq_len

99

In [3]:
# B. Calculate number of codon in the given sequence (if it is all translated)?
int(seq_len / 3)

33

In [4]:
# C. Check whether the given sequence starts with 'ATG'.
sequence.startswith("ATG")

True

In [5]:
# D. Check whether the sequence ends in frame. (the length of the sequence is divisible by 3)
seq_len % 3 == 0

True

In [6]:
# E. What percentage of sequence is GC ?
C_count = sequence.count('C')
G_count = sequence.count('G')
CG_pct = (C_count + G_count) / seq_len
"{:.2%}".format(CG_pct) # Get the percentage format

'63.64%'

In [7]:
# F.Print only the first and last codon.
print("First Condon:\t", sequence[:3]) # first codon
print("Last Condon:\t", sequence[-3:]) # last codon

First Condon:	 ATG
Last Condon:	 UAG


In [8]:
# G. Using the dictionary provided below, what amino acid does the second codon represent?
genetic_code = {
'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M',
'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T',
'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K',
'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',
'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L',
'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P',
'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q',
'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R',
'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V',
'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E',
'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G',
'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S',
'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L',
'TAC':'Y', 'TAT':'Y', 'TAA':'_', 'TAG':'_',
'TGC':'C', 'TGT':'C', 'TGA':'_', 'TGG':'W'
}

def get_aa(sequence: str, position: int) ->str:
  """
  get the amino acid encoded of given position in DNA

  Parameters:
    sequence - str of DNA
    position - start from 1
  """
  return genetic_code[ sequence[3 * (position - 1): 3 * position] ]

In [9]:
condon_2 = sequence[3: 6] # Get the second condon
genetic_code[condon_2] # get the amino acid

'Y'

In [10]:
get_aa(sequence, 2) # same thing

'Y'

# 2.Fahrenheit to Celsius conversion

A. Convert the temperature 23 degrees in Fahrenheit to Celsius

In [11]:
def f2c(ftemp):
  """
  Convert Fahrenheit temperature to Celsius
  """
  assert isinstance(ftemp, (float, int)), "Please input a real number!" # make sure the input is a real number (int or float)
  return (ftemp - 32) * 5 / 9

In [12]:
f2c(23)

-5.0

# 3.Gene Sets

- Nitrogen = Os1, Os2, Os3, Os5
- Carbon = Os1, Os3, Os4, Os5

Questions:

1. Create 2 sets of genes, one for Nitrogen and one for Carbon.
1. Using python functions, determining which genes are present in both sets.
1. Using python functions, determining which gene(s) are present only in Nitrogen
Set.
1. Create a new set called Allgenes that contains genes from both sets

In [13]:
# A. Create 2 sets of genes, one for Nitrogen and one for Carbon.

nitrogen = set(("Os1", "Os2", "Os3", "Os5"))
print("Nitrogen Genes:\t", nitrogen)
carbon = set(("Os1", "Os3", "Os4", "Os5"))
print("Carbon Genes:\t", carbon)

Nitrogen Genes:	 {'Os2', 'Os3', 'Os5', 'Os1'}
Carbon Genes:	 {'Os4', 'Os3', 'Os5', 'Os1'}


In [14]:
# B. Using python functions, determining which genes are present in both sets.

def get_same_genes(genes1 :set, genes2 :set) ->set:
  """
  get a set of genes if they are in both gene sets.
  """
  return genes1 & genes2 # intersection

get_same_genes(nitrogen, carbon)

{'Os1', 'Os3', 'Os5'}

In [15]:
# C. Using python functions, determining which gene(s) are present only in NitrogenSet

def get_exclusive_genes(genes1 :set, genes2 :set) ->set:
  """
  get a set of genes if they are in gene set 1 and not in gene set 2.
  """
  # return genes1.difference(genes2)
  return genes1 - genes2

get_exclusive_genes(nitrogen, carbon)

{'Os2'}

In [16]:
# D. Create a new set called Allgenes that contains genes from both sets

Allgenes = nitrogen | carbon
Allgenes

{'Os1', 'Os2', 'Os3', 'Os4', 'Os5'}