In [115]:
import networkx as nx
import pandas as pd
from functools import partial 

def to_integer(bitstr):
    return int(bitstr, 2)

def Rename_Colors(dict_color_ctr_map, color):
    return dict_color_ctr_map[color]

def Create_Sublineages_Matrix(df):
    row_list = []
    for i, row in df.iterrows():
        K_mer = row['K-Mer']
        sub_lin = row['Sublineages'].split(',')
        bit_val = [1]*len(sub_lin)
        d = dict(zip(sub_lin, bit_val))
        d['K_Mer'] = K_mer
        row_list.append(d)
    df_op = pd.DataFrame(row_list)
    df_op.fillna(0, inplace = True)
    df_op = df_op.set_index('K_Mer')
    df_op.sort_index(axis=1, inplace=True)
    df_op = df_op.astype(int).astype(str)
    df_op['Bitstring'] = df_op.apply(lambda row: ''.join(row.values.astype(str)), axis=1)
    df_op['Color'] = df_op['Bitstring'].apply(to_integer)

    color_unique = df_op['Color'].unique()
    counter = [i for i in range(len(color_unique))]
    color_rename_map = dict(zip(color_unique, counter))
    df_op['Color_new'] = df_op['Color'].apply(partial(Rename_Colors, color_rename_map))
    return df_op[['Color_new','Bitstring']]
    

In [116]:
df = pd.read_csv('data/HIV_full_Refs_k23_1.kmer_sublineage_info.tsv', sep = '\t', names = ['K-Mer','Sublineages'])

In [117]:
df_op = Create_Sublineages_Matrix(df)

In [129]:
G = nx.read_gexf('data/HIV_full_Refs_k23_1.gexf')
nx.set_node_attributes(G, df_op[['Color_new']].T.to_dict())

In [131]:
G.nodes['AAAAACAGGAAAATATGCCAGAA']

{'label': 'AAAAACAGGAAAATATGCCAGAA', 'Color_new': 0}

In [132]:
nx.write_gexf(G, "data/HIV_full_Refs_k23_1_Color_Annotated.gexf")