In [None]:
import graph_fourier_transform
import graph_ruggedness_de
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import networkx as nx
import os
import matplotlib.ticker as ticker

### GFT and DE for hamming and KNN graphs
The below cells computes the Dirichlet energy and graph Fourier Transform over hamming and KNN graphs. 

In [None]:
def remove_negative_sign(x, pos):
    return str(x).replace("-", "")

def format_yticks(x, pos):
    return f"{abs(x):.1f}"

dtype_spec = {'Genotype': str}
df = pd.read_csv('../data_files/DHFR_ic50_c57.csv', dtype=dtype_spec)
seq_ls = df['Genotype'].tolist()
values = df['F'].tolist()
scaler = MinMaxScaler()
values = [val[0] for val in (scaler.fit_transform(np.array(values).reshape(-1,1)))]

G_h = graph_ruggedness_de.build_ohe_graph(seq_ls=seq_ls,
                                        values=values,
                                        hamming_edges=True)
de_h = graph_ruggedness_de.compute_dirichlet_energy(G=G_h)
hamming_norm_gft_coefficients = graph_fourier_transform.graph_fourier_transform(G=G_h)

G_k = graph_ruggedness_de.build_ohe_graph(seq_ls=seq_ls,
                                        values=values,
                                        hamming_edges=False)
de_k = graph_ruggedness_de.compute_dirichlet_energy(G=G_k)
knn_norm_gft_coefficients = graph_fourier_transform.graph_fourier_transform(G=G_k)

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

fig = plt.figure(figsize=(4, 2))

gs = gridspec.GridSpec(1, 2, width_ratios=[30, 1])

ax1 = plt.subplot(gs[0])
ax1.bar(x=list(range(len(values))), height=hamming_norm_gft_coefficients, edgecolor='black')
ax1.bar(x=list(range(len(values))), height=-knn_norm_gft_coefficients, edgecolor='black')
ax1.set_ylim(-(0.1*max(hamming_norm_gft_coefficients) + max(hamming_norm_gft_coefficients)),
             (0.1*max(hamming_norm_gft_coefficients) + max(hamming_norm_gft_coefficients)))
ax1.set_xticks([])
ax1.set_ylabel('Magnitude')

ax2 = plt.subplot(gs[1])
ax2.bar(x=[1], height=de_h, edgecolor='black')
ax2.bar(x=[1], height=-de_k, edgecolor='black')
ax2.set_xticks([])
ax2.set_ylabel('Dirichlet energy')
ax2.set_ylim(-(0.25*de_k + de_k),
             (0.25*de_k + de_k))

ax1.yaxis.set_major_formatter(ticker.FuncFormatter(format_yticks))
ax2.yaxis.set_major_formatter(ticker.FuncFormatter(format_yticks))

plt.tight_layout()
plt.savefig('figures/Figure_2/DHFR_ic50_c57.pdf')
plt.show()

fig = plt.figure(figsize=(2, 2))
values = [node[1]['value'] for node in G_k.nodes(data=True)]
viridis = plt.cm.get_cmap('viridis', 10)
node_colors = [viridis((value - min(values)) / (max(values) - min(values))) for value in values]
nx.draw(G_k, node_color=node_colors, with_labels=False, edgecolors='black', node_size=100, width=0.75, edge_color='grey')
plt.savefig('figures/Figure_2/DHFR_ic50_c57_knn.pdf')
plt.show()

fig = plt.figure(figsize=(2, 2))
values = [node[1]['value'] for node in G_h.nodes(data=True)]
viridis = plt.cm.get_cmap('viridis', 10)
node_colors = [viridis((value - min(values)) / (max(values) - min(values))) for value in values]
nx.draw(G_h, node_color=node_colors, with_labels=False, edgecolors='black', node_size=100, width=0.75, edge_color='grey')
plt.savefig('figures/Figure_2/DHFR_ic50_c57_ham.pdf')
plt.show()

### Fitness distribution
The below cell computes and plots the distribution of fitnesses over the graph. 

In [None]:
df = pd.read_csv('../data_files/DHFR_ic50_c57.csv', dtype=dtype_spec)
seq_ls = df['Genotype'].tolist()
values = df['F'].tolist()
scaler = MinMaxScaler()
values = [val[0] for val in (scaler.fit_transform(np.array(values).reshape(-1,1)))]

fig = plt.figure(figsize=(1.5, 1.0))
plt.hist(values, bins=10, color='grey')
#plt.xlabel('Fitness')
plt.tight_layout()
plt.savefig('figures/Figure_2/DHFR_ic50_c57_dist.pdf')