In [None]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
directory = "data"
file_names = ["L1.csv", "L2.csv", "L3.csv", "L4.csv", "L5.csv", "L6.csv"]

In [None]:
material = {
    "red":         {0: "#ffebee",1: "#ffcdd2",2: "#ef9a9a",3: "#e57373",4: "#ef5350",5: "#f44336",6: "#e53935",7: "#d32f2f",8: "#c62828",9: "#b71c1c",},
    "amber":       {0: "#fff8e1",1: "#ffecb3",2: "#ffe082",3: "#ffd54f",4: "#ffca28",5: "#ffc107",6: "#ffb300",7: "#ffa000",8: "#ff8f00",9: "#ff6f00",},
    "lime":        {0: "#f9fbe7",1: "#f0f4c3",2: "#e6ee9c",3: "#dce775",4: "#d4e157",5: "#cddc39",6: "#c0ca33",7: "#afb42b",8: "#9e9d24",9: "#827717",},
    "cyan":        {0: "#e0f7fa",1: "#b2ebf2",2: "#80deea",3: "#4dd0e1",4: "#26c6da",5: "#00bcd4",6: "#00acc1",7: "#0097a7",8: "#00838f",9: "#006064",},
    "blu gry":     {0: "#eceff1",1: "#cfd8dc",2: "#b0bec5",3: "#90a4ae",4: "#78909c",5: "#607d8b",6: "#546e7a",7: "#455a64",8: "#37474f",9: "#263238",},
}

In [None]:
data_list = []
for file_name in file_names:
    file_path = os.path.join(directory, file_name)
    data = pd.read_csv(file_path)
    data_list.append(data)

data = pd.concat(data_list, ignore_index=True)
data['TAG'] = ['L{}'.format(i + 1) for i in range(len(file_names)) for _ in range(len(data_list[i]))]

conditions = [
    (data['Affinity'] < -11),
    ((data['Affinity'] >= -11) & (data['Affinity'] < -10)),
    ((data['Affinity'] >= -10) & (data['Affinity'] < -9)),
    ((data['Affinity'] >= -9) & (data['Affinity'] < -8)),
    ((data['Affinity'] >= -8) & (data['Affinity'] < -7))]

choices = [
    'less than -11',
    '-11 to -10',
    '-10 to -9',
    '-9 to -8',
    '-8 to -7']

data['affinity'] = np.select(conditions, choices, default='other')
display(data.head())
print(data.shape)
data = data.drop(columns=['Name','SMILES','Affinity'])

In [None]:
cross_tab = pd.crosstab(index=data['TAG'],columns=data['affinity'])
desired_order = ['less than -11', '-11 to -10', '-10 to -9', '-9 to -8', '-8 to -7']
cross_tab = cross_tab[desired_order]
cross_tab

In [None]:
unique_affinities = cross_tab.columns
colors = [material[color][6] for color in material] + [material[color][i] for color in material for i in range(1, len(unique_affinities))]

ax = cross_tab.plot(kind='bar', stacked=True, figsize=(10, 6), color=colors, width=0.4)
ax.set_xticklabels(ax.get_xticklabels(), rotation=0, fontweight='bold')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.tick_params(left=True, bottom=False)

plt.xlabel('Categories')
plt.ylabel('Count')
plt.title('Binding Affinity at 6 different conformers')
plt.legend(title='affinity(kcal/mol)', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.savefig("figures/stacked_histogram.png")
plt.show()