In [1]:
import pandas as pd
import altair as alt
import numpy as np
import os
alt.data_transformers.enable('default', max_rows=None)

DataTransformerRegistry.enable('default')

In [2]:
df = pd.read_excel('table.xlsx', sheet_name = 'Sheet9')

In [3]:
#split the exsisting df into 2 parts based on column name
df1 = pd.DataFrame(df['2.5+5'])
for col in df.columns:
    df1[col] = df[col]
    if col == 'C':
        break;
df2 = pd.DataFrame(df['Unnamed: 29'])
flag = 1
for col in df.columns:
    if col == 'Unnamed: 29':
        flag = 0
    if flag:
        continue
    df2[col] = df[col]
df1 = df1.dropna(axis = 'rows')
df2 = df2.dropna(axis = 'rows')

In [4]:
def transform(df):
    col = df.columns[1:].tolist()
    row = df.iloc[:, 0].tolist()
    ndf = pd.DataFrame()
    coln = []
    rown = []
    mutn = []
    data = []
    for i in range(len(row)):
        for j in range(len(col)):
            rown.append(row[i])
            coln.append(col[j])
            mutn.append(str(row[i]) + str(col[j]))
            val = df.iloc[i, j + 1]
            try:
                val = round(float(val), 2)
            except ValueError:
                if val == 'wt':
                    val = 'X'
                if val == 'qs':
                    val = '-'
            data.append(val)
    ndf['row'] = rown
    ndf['col'] = coln
    ndf['data'] = data
    ndf['mut'] = mutn
    return ndf

In [5]:
ndf1 = transform(df1)
ndf2 = transform(df2)
ndf1

Unnamed: 0,row,col,data,mut
0,S19,M,0.44,S19M
1,S19,L,0.18,S19L
2,S19,I,1.43,S19I
3,S19,V,0.95,S19V
4,S19,A,-0.7,S19A
...,...,...,...,...
12015,K619,Y,0.6,K619Y
12016,K619,F,0.12,K619F
12017,K619,P,1.57,K619P
12018,K619,G,-1.91,K619G


In [6]:
def make_chart(ndf1):
    base = alt.Chart(ndf1).encode(
        x = alt.X('col', sort = None, title = 'Amino Acid'),
        y = alt.Y('row', sort = None, title = 'Position'),
    ).properties(
        width = 500,
        height = 10000
    )
    # Text layer with number
    text = base.mark_text(size = 10).encode(
        text = 'data',
        color = alt.value("black")
    )
    #color scale
    color_scale = alt.Scale(
        domain = [-5, 0, 5],
        range = ["#FF7128", "white", "#0070C0"]
    )
    # Color layer based on the number
    rect = base.mark_rect().encode(
        color = alt.Color('data:Q', scale = color_scale, 
                      legend = alt.Legend(title = "RBD Binding")),
        tooltip = [
            alt.Tooltip('data', title = 'RBD Binding'),
            alt.Tooltip('mut', title = 'Mutation'),   
        ]
    )
    return rect + text

In [7]:
r1 = make_chart(ndf1)

In [8]:
with open('secondary_structure.txt') as f:
    lines = f.readlines()
secondary = []
seccat = []
val = -5
for el in lines:
    el = el.strip()
    secondary.append(el)
    if el == 'loop':
        val = -5
    elif el == 'a-h':
        val = 0
    else:
        val = 5
    seccat.append(val)
secondary.extend(['NA', 'NA', 'NA', 'NA'])
seccat.extend([-5, -5, -5, -5])

In [9]:
df = pd.read_excel('surface.xls', sheet_name = 'surface')
df['secondary'] = secondary
df['seccat'] = seccat
Surface_Cat = []
for el in df['Surface']:
    if el <= 50:
        Surface_Cat.append(-5)
    elif el <= 100:
        Surface_Cat.append(0)
    else:
        Surface_Cat.append(5)
df['SCategory'] = Surface_Cat
df

Unnamed: 0,Location,Surface,secondary,seccat,SCategory
0,S19,92.4,loop,-5,0
1,T20,64.0,loop,-5,0
2,I21,35.2,loop,-5,-5
3,E22,20.0,a-h,0,-5
4,E23,56.0,a-h,0,0
...,...,...,...,...,...
596,D615,86.0,loop,-5,0
597,Q616,50.0,,-5,-5
598,S617,50.0,,-5,-5
599,I618,50.0,,-5,-5


In [10]:
base = alt.Chart(df).encode(
        y = alt.Y('Location', sort = None, title = 'Location',
                  axis = alt.Axis(labels = False)),
    )
color_scale = alt.Scale(
    domain = [-5, 0, 5],
    range = ["#FF7128", "white", "#0070C0"]
)
rect = base.mark_rect().encode(
    color = alt.Color('SCategory:Q', scale = color_scale, 
                      legend = alt.Legend(title = "Surface Category")),
)
r2 = rect
r2

In [62]:
base = alt.Chart(df).encode(
        y = alt.Y('Location', sort = None,
                  axis = alt.Axis(labels = False))
    )
color_scale = alt.Scale(
    domain = [-5, 0, 5],
    range = ["#FF7128", "white", "#0070C0"]
)
rect = base.mark_rect().encode(
    color = alt.Color('seccat:Q', scale = color_scale, 
                      legend = alt.Legend(title = "Secondary")),
)
r3 = rect

In [66]:
r = alt.hconcat(r1, r2, r3).resolve_scale(
    color='independent'
).configure_view(
    stroke=None
)

In [68]:
r.save('result.html')