In [None]:
import os
import numpy as np
import pandas as pd
import numpy_indexed as npi
from tqdm import tqdm

In [None]:
def normalize(x,ma,mi,ran=255):       
    x[x!=0]=(x[x!=0]-mi)/(ma-mi)*ran
    return x

In [None]:
def gaussian_mix_spectrum_by_file(src, sigma, dis_type, v2='False'):

    assert dis_type in ['elec','norm']
    if dis_type == 'elec':
        n_combo=50;nf=50;y_start=0;y_end=1;ngrid=50;x_start=0;x_end=25;sigma=sigma
    if dis_type == 'norm':
        n_combo=36;nf=50;y_start=0;y_end=50;ngrid=50;x_start=0;x_end=25;sigma=sigma

    src_x = np.loadtxt(src)
    if v2:
        # nf=100;y_start=0;y_end=50
        src2 = src.replace('.txt', '_p2.txt')
        src_x = np.vstack([np.loadtxt(src), np.loadtxt(src2)])

    xt=np.linspace(x_start, x_end, num=50, endpoint=False)
    yt=np.linspace(y_start, y_end, num=50, endpoint=False)
    x_ins = np.digitize(src_x[:,3], xt)
    y_ins = np.digitize(src_x[:,2], yt)

    df = pd.DataFrame(src_x, columns=['combo ID','betti','ys','xs'])
    df['y_ins'] = y_ins
    df['x_ins'] = x_ins

    combo_vals = np.zeros((n_combo,ngrid,nf))
    for name,mf in df.groupby('combo ID'):
        val = np.tile(xt,(len(mf),1))
        val = np.square((val-mf['xs'].values.reshape(-1,1)))/(2*sigma**2)
        val = pd.DataFrame(np.exp(-val))
        # val = val.groupby(mf['y_ins'].tolist()).sum().reindex(range(nf)).fillna(0)
        ind, val = npi.group_by(mf['y_ins']).sum(val)
        mf_val = pd.DataFrame(val,index=ind).reindex(range(50)).fillna(0).values
        # my_vals.append(mf_val)
        combo_vals[int(name-1),:,:] = normalize(mf_val,mf_val.max(),mf_val.min())

    return combo_vals

# Demo on 2013

## Norm

In [None]:
src = './PDB_data/'
train_2013 = [line.strip() for line in open(src+'train_2013.txt')] 
test_2013 = [line.strip() for line in open(src+'core_2013.txt')] 

In [None]:
sigma = 1.5 #change sigma 0.2 0.5 1 1.5 2 5 10
n_combo = 36

root = './PDB_data/2013_normDist'
tardir = root.replace('PDB_data', 'Temp_tensor')+f'_sigma_{sigma}'

if not os.path.exists(tardir):
    os.makedirs(tardir)

In [None]:
files = [x for x in os.listdir(root) if x.endswith('eigv.txt')]

files_sorted = sorted(files,key=lambda x:(train_2013+test_2013).index(x.split('_')[0]))

In [None]:
tensor_2013_norm = np.zeros((len(files_sorted),n_combo,50,50))
for i,file in tqdm(enumerate(files_sorted)):
    img = gaussian_mix_spectrum_by_file(root+'/'+file, sigma, 'norm', v2=False)
    tensor_2013_norm[i,:,:,:] = img
np.save(tardir+'/'+'tensor_2013_norm.npy',tensor_2013_norm)                  

## Elec

In [None]:
sigma = 1.5 #change sigma 0.2 0.5 1 1.5 2 5 10
n_combo = 50

root = './PDB_data/2013_elecDist'
tardir = root.replace('PDB_data', 'Temp_tensor')+f'_sigma_{sigma}'

if not os.path.exists(tardir):
    os.makedirs(tardir)

In [None]:
files = [x for x in os.listdir(root) if x.endswith('eigv_2.txt')]

files_sorted = sorted(files,key=lambda x:(train_2013+test_2013).index(x.split('_')[0]))

In [None]:
tensor_2013_elec = np.zeros((len(files_sorted),n_combo,50,50))
for i,file in tqdm(enumerate(files_sorted)):
    img = gaussian_mix_spectrum_by_file(root+'/'+file, sigma, 'elec', v2=False)
    tensor_2013_elec[i,:,:,:] =img
np.save(tardir+'/'+'tensor_2013_elec.npy',tensor_2013_elec)        

# Demo on 2016

## Norm

In [None]:
src = './PDB_data/'
train_2016 = [line.strip() for line in open(src+'train_2016.txt')] 
test_2016 = [line.strip() for line in open(src+'core_2016.txt')] 

In [None]:
sigma = 1.5 #change sigma 0.2 0.5 1 1.5 2 5 10
n_combo = 36

root = './PDB_data/2016_normDist'
tardir = root.replace('PDB_data', 'Temp_tensor')+f'_sigma_{sigma}'

if not os.path.exists(tardir):
    os.makedirs(tardir)

In [None]:
files = [x for x in os.listdir(root) if x.endswith('eigv.txt')]

files_sorted = sorted(files,key=lambda x:(train_2016+test_2016).index(x.split('_')[0]))

In [None]:
tensor_2016_norm = np.zeros((len(files_sorted),n_combo,50,50))
for i,file in tqdm(enumerate(files_sorted)):
    img = gaussian_mix_spectrum_by_file(root+'/'+file, sigma, 'norm', v2=False)
    tensor_2016_norm[i,:,:,:] = img
np.save(tardir+'/'+'tensor_2016_norm.npy',tensor_2016_norm)                  

## Elec

In [None]:
sigma = 1.5 #change sigma 0.2 0.5 1 1.5 2 5 10
n_combo = 50

root = './PDB_data/2016_elecDist'
tardir = root.replace('PDB_data', 'Temp_tensor')+f'_sigma_{sigma}'

if not os.path.exists(tardir):
    os.makedirs(tardir)

In [None]:
files = [x for x in os.listdir(root) if x.endswith('eigv_2.txt')]

files_sorted = sorted(files,key=lambda x:(train_2016+test_2016).index(x.split('_')[0]))

In [None]:
tensor_2016_elec = np.zeros((len(files_sorted),n_combo,50,50))
for i,file in tqdm(enumerate(files_sorted)):
    img = gaussian_mix_spectrum_by_file(root+'/'+file, sigma, 'elec', v2=False)
    tensor_2016_elec[i,:,:,:] =img
np.save(tardir+'/'+'tensor_2016_elec.npy',tensor_2016_elec)        