# Sequence Conservation

Analysis of sequence conservation for HlyB, PCAT, and HlyA-like protein sequences

## Import Modules

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from Bio import AlignIO
import logomaker

## Alignments Files and Sequence Conservation

### HlyB and Peptidase Sequences

In [None]:
# read in alignments
alignment = AlignIO.read(open("HlyB_peptidase_microcin_align_renamed.fasta"), "fasta")
print("Alignment length %i" % alignment.get_alignment_length())
for record in alignment:
    print(record.seq + " " + record.id)
len_seq = int(alignment.get_alignment_length()) # get length of sequences

In [None]:
seqs = [str(rec.seq) for rec in alignment] # list of sequences
ids = [rec.id for rec in alignment] # list of ids

In [None]:
align_dic = dict(zip(ids, seqs)) #dictionary of ids with original alignments

In [None]:
# Convert Alignment to LogoMaker matrix
align_logo_df = logomaker.alignment_to_matrix(sequences=seqs,to_type='probability',pseudocount=0)

In [None]:
#Make logo figure
logo_fig = logomaker.Logo(df=align_logo_df,
               color_scheme='chemistry',
               stack_order='big_on_top',
            fade_probabilities=True,
                         figsize=(100,5))
plt.savefig("Alignment Logo HlyB and Peptidases.pdf",dpi=300)

In [None]:
# Get CLD
CLD_index = []
for i in range(0,230):
    CLD_index.append(i)

In [None]:
# Sub alignment for CLD residues
sub_align_df = align_logo_df.iloc[CLD_index]
sub_align_df.reset_index(inplace=True, drop=True)

In [None]:
#Make logo figure - CLD domain
cld_logo_fig = logomaker.Logo(df=sub_align_df,
               color_scheme='chemistry',
               stack_order='big_on_top',
            fade_probabilities=True,
                         figsize=(100,5))
plt.savefig("Alignment Logo HlyB and Peptidases by CLD.pdf",dpi=300)

In [None]:
# Sub alignment for CLD residues - plotting
cld_align_df = align_logo_df.iloc[[60,63,96,97,98,99,100,108,113,115,118,119,137,146,148,149,151,174]]
cld_align_df.reset_index(inplace=True, drop=True)

In [None]:
#Make logo figure - CLD
cld_fig = logomaker.Logo(df=cld_align_df,
               color_scheme='chemistry',
               stack_order='big_on_top',
            fade_probabilities=True,
                         figsize=(30,5))
plt.savefig("Alignment Logo HlyB and Peptidases by CLD for thesis.pdf",dpi=300)

In [None]:
# Sub alignment for transmembrane residues - plotting
trans_nbd_align_df = align_logo_df.iloc[[335,337,341,344,411,415,418,419,422,647,648,682,684,690,692,693]]
trans_nbd_align_df.reset_index(inplace=True, drop=True)

In [None]:
#Make logo figure - TMD and NBD
trans_nbd_fig = logomaker.Logo(df=trans_nbd_align_df,
               color_scheme='chemistry',
               stack_order='big_on_top',
            fade_probabilities=True,
                         figsize=(30,5))
plt.savefig("Alignment Logo HlyB and Peptidases by TM and NBD for thesis.pdf",dpi=300)

In [None]:
# Protein-Lipid Interactions CLD
cld_lip_align_df = align_logo_df.iloc[[73,76,128,155,156,158,166,179,187]]
cld_lip_align_df.reset_index(inplace=True, drop=True)

In [None]:
#Make logo figure - lipid interactions with CLD
cld_lip_ig = logomaker.Logo(df=cld_lip_align_df,
               color_scheme='chemistry',
               stack_order='big_on_top',
            fade_probabilities=True,
                         figsize=(30,5))
plt.savefig("Alignment Logo HlyB and Peptidases by CLD for lipid interact for thesis.pdf",dpi=300)

In [None]:
# Protein-Lipid Interactions TMD
tm_lip_align_df = align_logo_df.iloc[[229,239,277,280,356,360,400,457,490,497,517,520,523,524]]
tm_lip_align_df.reset_index(inplace=True, drop=True)

In [None]:
#Make logo figure - lipid interactions TMD
tm_lip_fig = logomaker.Logo(df=tm_lip_align_df,
               color_scheme='chemistry',
               stack_order='big_on_top',
            fade_probabilities=True,
                         figsize=(30,5))
plt.savefig("Alignment Logo HlyB and Peptidases by TM for lipid interact for thesis.pdf",dpi=300)

### HlyA-Like Sequences

In [None]:
# read in in HlyA-like sequences
alignment = AlignIO.read(open("HlyA Seq Filtered Align.fasta"), "fasta")
print("Alignment length %i" % alignment.get_alignment_length())
for record in alignment:
    print(record.seq + " " + record.id)
len_seq = int(alignment.get_alignment_length()) # get length of sequences

In [None]:
seqs = [str(rec.seq) for rec in alignment] # list of sequences
ids = [rec.id for rec in alignment] # list of ids

In [None]:
align_dic = dict(zip(ids, seqs)) #dictionary of ids with original alignments

In [None]:
# Convert Alignment to LogoMaker matrix
align_logo_df = logomaker.alignment_to_matrix(sequences=seqs,to_type='probability',pseudocount=0)

In [None]:
#Make logo figure
logo_fig = logomaker.Logo(df=align_logo_df,
               color_scheme='chemistry',
               stack_order='big_on_top',
            fade_probabilities=True,
                         figsize=(100,5))
plt.savefig("Alignment Logo HlyA Filtered Seq.pdf",dpi=300)

#### Split by residue to make thesis figure

In [None]:
subset_align_df = align_logo_df.iloc[0:200]
subset_align_df.reset_index(inplace=True, drop=True)

In [None]:
#Make logo figure
logo_fig = logomaker.Logo(df=subset_align_df,
               color_scheme='chemistry',
               stack_order='big_on_top',
            fade_probabilities=True,
                         figsize=(100,5))
plt.savefig("Alignment Logo HlyA Filtered Seq First 200 aa.pdf",dpi=300)

In [None]:
subset_align_df = align_logo_df.iloc[200:400]
subset_align_df.reset_index(inplace=True, drop=True)

In [None]:
#Make logo figure
logo_fig = logomaker.Logo(df=subset_align_df,
               color_scheme='chemistry',
               stack_order='big_on_top',
            fade_probabilities=True,
                         figsize=(100,5))
plt.savefig("Alignment Logo HlyA Filtered Seq aa 200 to 400.pdf",dpi=300)

In [None]:
subset_align_df = align_logo_df.iloc[400:600]
subset_align_df.reset_index(inplace=True, drop=True)

In [None]:
#Make logo figure
logo_fig = logomaker.Logo(df=subset_align_df,
               color_scheme='chemistry',
               stack_order='big_on_top',
            fade_probabilities=True,
                         figsize=(100,5))
plt.savefig("Alignment Logo HlyA Filtered Seq aa 400 to 600.pdf",dpi=300)

In [None]:
subset_align_df = align_logo_df.iloc[600:800]
subset_align_df.reset_index(inplace=True, drop=True)

In [None]:
#Make logo figure
logo_fig = logomaker.Logo(df=subset_align_df,
               color_scheme='chemistry',
               stack_order='big_on_top',
            fade_probabilities=True,
                         figsize=(100,5))
plt.savefig("Alignment Logo HlyA Filtered Seq aa 600 to 800.pdf",dpi=300)

In [None]:
subset_align_df = align_logo_df.iloc[800:1000]
subset_align_df.reset_index(inplace=True, drop=True)

In [None]:
#Make logo figure
logo_fig = logomaker.Logo(df=subset_align_df,
               color_scheme='chemistry',
               stack_order='big_on_top',
            fade_probabilities=True,
                         figsize=(100,5))
plt.savefig("Alignment Logo HlyA Filtered Seq aa 800 to 1000.pdf",dpi=300)

In [None]:
subset_align_df = align_logo_df.iloc[849:1049]
subset_align_df.reset_index(inplace=True, drop=True)

In [None]:
#Make logo figure
logo_fig = logomaker.Logo(df=subset_align_df,
               color_scheme='chemistry',
               stack_order='big_on_top',
            fade_probabilities=True,
                         figsize=(100,5))
plt.savefig("Alignment Logo HlyA Filtered Seq aa 849 to 1049.pdf",dpi=300)