In [3]:
from glob import glob
import os
import subprocess
import pandas as pd
import numpy as np
import random
import statistics
import csv
import matplotlib.pyplot as plt
from pylab import figure, axes, title, show
from scipy import stats
import gzip
import matplotlib.ticker as tkr
from matplotlib import rc

# From the alignment, identify positions which are conserved in mammals and classify based on conservation in snakes.

In [None]:
snakes = ["Burmese_python", "Tiger_rattlesnake", "Brown-spotted_pit_viper", "Western_garter_snake", "Common_garter_snake", "Corn_snake", "Indian_cobra", "Eastern_brown_snake", "Tiger_snake"]
alignments = pd.read_csv("Z2_snakes_and_Cav_mammals_wDomains.tsv", index_col=0, sep="\t", header=None)
PWWP_Mam_Same = 0
PWWP_Snake_Diff = 0
ZFCW_Mam_Same = 0
ZFCW_Snake_Diff = 0
All_Same = []
Most_Same = []
Most_Diff =[]
All_Diff = []
CS_Diff = []
PWWPrange = []
ZFCWrange = []
humanpos = 0
mamcount = 0
for column in alignments.columns:
    snakeaa = {}
    mammalaa = {}
    PWWP = False
    ZFCW= False
    for index in alignments.index: 
        if index == "Homo_sapiens": 
            if alignments.loc[index, column] != "-": humanpos+= 1
        elif index == "Corn_snake": 
            CSval = alignments.loc[index, column]
        if index == "ZF-CW":
            if alignments.loc[index, column]=="TRUE": 
                ZFCW = True
                ZFCWrange.append(humanpos)
        elif index == "PWWP":
            if alignments.loc[index, column]=="TRUE": 
                PWWP = True
                PWWPrange.append(humanpos)
        elif index in snakes: 
            if alignments.loc[index, column] in snakeaa: snakeaa[alignments.loc[index, column]]+= 1
            else: snakeaa[alignments.loc[index, column]] = 1
        else: 
            if alignments.loc[index, column] in mammalaa: mammalaa[alignments.loc[index, column]]+= 1
            else: mammalaa[alignments.loc[index, column]] = 1
    total = 0
    maxcount = 0
    currmax = ""
    for aa in mammalaa: 
        total += mammalaa[aa]
        if mammalaa[aa] > maxcount: 
            maxcount = mammalaa[aa]
            currmax = aa
    if maxcount > (.95*total): 
        mammax = currmax
        if mammax == "-": continue
        else: 
            if ZFCW==True:
                ZFCW_Mam_Same += 1
            elif PWWP == True: 
                PWWP_Mam_Same += 1
    else: continue
    diff = 0
    same = 0
    for aa in snakeaa: 
        if aa != mammax: diff += snakeaa[aa]
        else: same += snakeaa[aa]
    diff = 0
    same = 0
    for aa in snakeaa: 
        if aa == "-": same += snakeaa[aa]
        elif aa != mammax: diff += snakeaa[aa]
        else: same += snakeaa[aa]
    # If it's a CS-specific mutation, make that purple
    if diff == 0: 
        All_Same.append(humanpos)
    elif diff == 1 and CSval != mammax: 
        CS_Diff.append(humanpos)
    elif diff < 5: 
        Most_Same.append(humanpos)
    elif diff > 4 and diff < 9: 
        Most_Diff.append(humanpos)
    elif diff == 9: 
        All_Diff.append(humanpos)
        if ZFCW==True: ZFCW_Snake_Diff += 1
        if PWWP==True: PWWP_Snake_Diff += 1

In [None]:
from matplotlib.patches import Rectangle
#Make subplots
plt.rcParams.update({'font.size': 6})
fig = plt.figure(constrained_layout=True, dpi=1200)
gs = fig.add_gridspec(15, 20)
a1 = fig.add_subplot(gs[0:7, 0:6])
a2 = fig.add_subplot(gs[0:7, 6:20])
a3 = fig.add_subplot(gs[7:11, 0:5])
a4 = fig.add_subplot(gs[11:15, 0:5])
a5 = fig.add_subplot(gs[7:15, 5:20])
#Plot binding assay data (5A) as a bar plot
bindingassay = pd.read_csv("BindingAssay_Data.txt",sep="\t")
a1.bar(x=range(0,5), height=bindingassay.iloc[0:5, 4],color="C0", zorder=1, yerr=[x*2 for x in bindingassay.iloc[0:5, 5]], capsize=2, label="Mouse ZCWPW2\nn=2", error_kw={"linewidth":.5})
a1.bar(x=range(5,10), height=bindingassay.iloc[5:10, 4],color="C4", zorder=2, yerr=[x*2 for x in bindingassay.iloc[5:10, 5]], capsize=2, label="Corn snake ZCWPW2\nn=3", error_kw={"linewidth":.5})
a1.set_xticks(range(0, 10))
a1.set_xticklabels(bindingassay.iloc[:, 0], rotation=70, fontsize=6)
a1.set_ylabel("Band intensity \nnormalized to K4/K36", fontsize=6)
a1.spines['right'].set_visible(False)
a1.spines['top'].set_visible(False)

#Add data points on 5A
a1.scatter([x for x in range(0,10)], bindingassay.iloc[:, 1], color="black", alpha=.7, s=7, zorder=3)
a1.scatter([x-.1 for x in range(0,10)], bindingassay.iloc[:, 2], color="black", alpha=.7, s=7, zorder=4)
a1.scatter([x+.1 for x in range(0,10)], bindingassay.iloc[:, 3], color="black", alpha=.7, s=7, zorder=5)
a1.legend(loc=(.05, .75),fontsize=5)

#Plot substitution data, calculated from above cell (5B)
a2.spines['right'].set_visible(False)
a2.spines['top'].set_visible(False)
a2.spines['bottom'].set_visible(False)
a2.spines['left'].set_visible(False)
a2.vlines(x = All_Same, ymin = 2, ymax = 3,colors = 'darkgrey', label="Conserved in all snakes", linewidth=.75)
a2.vlines(x = Most_Same, ymin = 2, ymax = 3,colors = "#ffb6db", label = 'Substitution in <50% of snakes', linewidth=.75)
a2.vlines(x = Most_Diff, ymin = 2, ymax = 3,colors = "#ff6db6", label = 'Substitution in >50% of snakes', linewidth=.75)
a2.vlines(x = All_Diff, ymin = 2, ymax = 3,colors =  "#490092", label = 'Substitution in all snakes', linewidth=.75)

#Add rectangles across PWWP and zf-CW range
a2.add_patch(Rectangle((min(PWWPrange), 1.75), max(PWWPrange) - min(PWWPrange), 1.5, alpha=.2, color="grey"))
a2.add_patch(Rectangle((min(ZFCWrange), 1.75), max(ZFCWrange) - min(ZFCWrange), 1.5, alpha=.2, color="grey"))
a2.text(s="PWWP",x=127,y=3.4,fontsize=6)
a2.text(s="zf-CW",x=43,y=3.4,fontsize=6)
a2.hlines(2.5, 1, 356, color="black", linewidth=1)
a2.set_ylim(1, 6)
a2.set_xlim(1, 360)
a2.set_xlabel("ZCWPW2 amino acid position in human ortholog", fontsize=6)
a2.legend(title="Highly conserved in mammals", ncol=2, fontsize=5, loc="upper center")
#plt.text(x=-44, y = 2.4, s="ZCWPW2",fontsize=13)
a2.set_yticks([], [])

#Plot RMSD distributions for ZCWPW2 domains (5C-D)
ZFCW = pd.read_csv("Z2_ZFCW_Hum_Snake.csv")
PWWP = pd.read_csv("Z2_PWWP_Hum_Snake.csv")
a3.spines['right'].set_visible(False)
a3.spines['top'].set_visible(False)
a3.spines['left'].set_visible(False)
a4.spines['right'].set_visible(False)
a4.spines['top'].set_visible(False)
a4.spines['left'].set_visible(False)
bin_s = np.arange(0, 1.5, .01)
a4.hist(ZFCW.iloc[:, 3], bins=bin_s, color="#920000", label="zf-CW domain", alpha=.7)
a4.hist(PWWP.iloc[:, 3], bins=bin_s, color="#009292", label="PWWP domain", alpha=.7)
a4.set_xlabel("RMSD ($\mathrm{\AA}$) between ZCWPW2\nstructure predictions", fontsize=6)
a4.tick_params(axis='both', which='major', labelsize=6)
ZFCW = pd.read_csv("Z2_ZFCW_Hum_Mouse.csv")
PWWP = pd.read_csv("Z2_PWWP_Hum_Mouse.csv")
bin_s = np.arange(0, 1.5, .01)
a3.set_title("Human-mouse comparison",fontsize=6)
a3.hist(ZFCW.iloc[:, 3], bins=bin_s, color="#920000", label="zf-CW domain", alpha=.8)
a3.hist(PWWP.iloc[:, 3], bins=bin_s, color="#009292", label="PWWP domain", alpha=.8)
a3.legend(fontsize=5)
a3.set_yticks([],[])
a4.set_yticks([],[])
a3.set_xticks([],[])
a4.set_title("Human-corn snake comparison", fontsize=6)
a5.spines['right'].set_visible(False)
a5.spines['top'].set_visible(False)
a5.spines['left'].set_visible(False)
a5.spines['bottom'].set_visible(False)
a5.set_yticks([],[])
a5.set_xticks([],[])
plt.savefig("Figure5.svg")