1JHC,2JHN(REDO W DROPOUT),3JHN

In [1]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns, pickle, numba, torch, tqdm, random, utils, os, gc, time
import networkx as nx
from collections import OrderedDict, defaultdict
from copy import deepcopy

from rdkit.Chem.AtomPairs.Utils import NumPiElectrons
from rdkit.Chem.rdMolTransforms import GetAngleRad, GetBondLength
from rdkit.Chem.rdchem import Atom, Bond
from rdkit.Chem.AtomPairs.Pairs import rdMolDescriptors as rdmd
from rdkit import Chem, RDConfig
from rdkit.Chem import ChemicalFeatures

import torch_geometric.transforms as T
import torch.nn.functional as F
from torch.nn import Sequential, Linear, ReLU, GRU, BatchNorm1d, Dropout, SELU
from torch_geometric.data import Data
from torch_geometric.nn import NNConv, Set2Set, GATConv
from torch_geometric.data import DataLoader


%config InlineBackend.figure_format ='retina'

In [2]:
structures = pd.read_csv("structures.csv")
test = pd.read_csv('test.csv')

In [3]:
gratio = pd.DataFrame({"atom":["H","C","N","O","F"],
                       "Gratio": [42.576,10.705,-4.316,-5.772,40.060],
                       "Eneg": [2.2,2.55,3.04,3.44,3.98],
                       "radius":[110,170,155,152,147],
                       "atomic_num":[1,6,7,8,9],
                       "numv":[1,4,5,6,7]})
structures = structures.merge(gratio,how='left',on='atom')

In [4]:
test = test.groupby('type').get_group('2JHC')

In [5]:
t2 = pd.read_csv('test.csv')
t2 = t2[t2['molecule_name'].isin(test['molecule_name'])]
t2.drop(['id','type'],axis=1,inplace=True)

In [6]:
t2.head()

Unnamed: 0,molecule_name,atom_index_0,atom_index_1
0,dsgdb9nsd_000004,2,0
1,dsgdb9nsd_000004,2,1
2,dsgdb9nsd_000004,2,3
3,dsgdb9nsd_000004,3,0
4,dsgdb9nsd_000004,3,1


In [7]:
from collections import defaultdict
molcouples = defaultdict(list)
cpv = t2.values.tolist()

In [8]:
for c in tqdm.tqdm_notebook(cpv):
    molcouples[c[0]].append((c[1],c[2]))

HBox(children=(IntProgress(value=0, max=2503729), HTML(value='')))




In [9]:
test.drop('type',axis=1,inplace=True)

In [10]:
test = utils.map_atom_info(test,0,structures)
test = utils.map_atom_info(test,1,structures)

In [11]:
with open('rdkitmolecules.p', 'rb') as fp:
    d = pickle.load(fp)

In [12]:
molnames = list(test['molecule_name'].unique())
mols = OrderedDict()
for name in molnames:
    mols[name] = d[name]
struct =  structures[structures['molecule_name'].isin(molnames)]
g = struct.groupby('molecule_name')
struct.head()

Unnamed: 0,molecule_name,atom_index,atom,x,y,z,Gratio,Eneg,radius,atomic_num,numv
12,dsgdb9nsd_000004,0,C,0.599539,0.0,1.0,10.705,2.55,170,6,4
13,dsgdb9nsd_000004,1,C,-0.599539,0.0,1.0,10.705,2.55,170,6,4
14,dsgdb9nsd_000004,2,H,-1.661639,0.0,1.0,42.576,2.2,110,1,1
15,dsgdb9nsd_000004,3,H,1.661639,0.0,1.0,42.576,2.2,110,1,1
88,dsgdb9nsd_000016,0,C,-0.011933,1.514332,0.010317,10.705,2.55,170,6,4


In [13]:
from rdkit import Chem
from rdkit.Chem import ChemicalFeatures
from rdkit import RDConfig
import os
fdefName = os.path.join(RDConfig.RDDataDir,'BaseFeatures.fdef')
factory = ChemicalFeatures.BuildFeatureFactory(fdefName)

In [14]:
atfn = {'Acceptor':0,
         'Aromatic':1,
         'Donor':2,
         'Hydrophobe':3,
         'LumpedHydrophobe':4,
         'NegIonizable':5,
         'PosIonizable':6,
         'ZnBinder':7}

In [15]:
#ADD DISTANCE TO TARGET FEATURE
def getAtomNodeFeats(molname, mols, data):
    symbol, x, y, z, gr,eneg,radius,atomnum,nv = 2,3,4,5,6,7,8,9,10
    mol = mols[molname]
    feats = factory.GetFeaturesForMol(mol)
    atfdict = defaultdict(list)
    for i in range(len(feats)):
        fam = feats[i].GetFamily()
        dl = feats[i].GetAtomIds()
        for aid in dl:
            atfdict[aid].append(fam)
    
    l = len(data)
    nodef = []
    
    eems = rdmd.CalcEEMcharges(mol)
    spfd = {'S':0,'SP':0.5,'SP2':1/3,'SP3':1/4}
    
    for i in range(l):
        d = data[i]
        atom = mol.GetAtomWithIdx(i)
        ahyb =  str(atom.GetHybridization())
        sym = d[symbol]
        hf = 'NONE'
        if i in atfdict:
            hf = atfdict[i]
        ff = [sym=='H',sym=='C',sym=='N',sym=='O',sym=='F',d[atomnum],(int)(atom.IsInRing() == True),
              ahyb=='S',ahyb=='SP', ahyb=='SP2', ahyb=='SP3',d[x],d[y],d[z],eems[i],spfd[ahyb],
              d[radius],d[eneg],d[gr],NumPiElectrons(atom),0,0]
        ff += ([0]*135)#        #AT3 FEATS   -24,-12
        newf = [0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,  0, 0,0,0,0,0,0,0,0,0,0]
        newf[-11] = d[nv]
        if hf != 'NONE':
            for t in hf:
                newf[atfn[t]] = 1
            
        ff += newf
        nodef.append(ff)
    
    lm = molcouples[molname]
    for i,(a0,a1) in enumerate(lm):
        nodef[a0][22+i] = 1
        nodef[a1][22+i] = 1
    return nodef

In [16]:
@numba.jit(nopython=True)
def getAngle(x0,y0,z0,x1,y1,z1):
    mag_x0 = np.sqrt(x0**2 + y0**2 + z0**2)
    mag_x1 = np.sqrt(x1**2 + y1**2 + z1**2)
    dotp = (x0*x1) + (y0* y1) + (z0*z1)
    c = dotp/(mag_x0*mag_x1)
    if c < -1:
        c = -1
    elif c > 1:
        c = 1
    theta = np.arccos(c)
    return theta, np.cos(theta), np.sin(theta)

In [17]:
def getBondEdgeFeats(molname, mols, data):
    x, y, z = 3,4,5
    mol = mols[molname]
    bonds = mol.GetBonds()
    src,dst = [],[]
    bondf = []
    aed = defaultdict(list)
    
    btd = {'AROMATIC':0,'SINGLE':1,'DOUBLE':2, 'TRIPLE':3}
    #make bidirectional
    for bond in bonds:
        id0,id1 = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
        
        src.append(id0)
        dst.append(id1)
        src.append(id1)
        dst.append(id0)
        
        aed[id0].append(id1)
        aed[id1].append(id0)
        
        t0,t1 = data[id0],data[id1]
        x0,y0,z0,x1,y1,z1 = t0[x],t0[y],t0[z],t1[x],t1[y],t1[z]   
                
        d = GetBondLength(mol.GetConformer(),id0,id1)
        a,cos,sin = getAngle(x0,y0,z0,x1,y1,z1)
        isconj = (int)(bond.GetIsConjugated() == True)
        btype =  str(bond.GetBondType())
        bt = [0,0,0,0]
        bt[btd[btype]] = 1
        #angle t0, cos t0, sin t0, angle t1, cos t1, sin t1, angle tv, cos tv, sin tv
        # -16,-10
        ff = [d,a,cos,sin,isconj] + bt + [0,0,0,0,0,0,0] + [0,0,0,0,0,0,0,0,0]
        bondf.append(ff)
        bondf.append(ff)
        #[blen,angle,cos,sin,isconj,bt,bt,bt,bt]
    edge_index = []
    edge_index.append(src)
    edge_index.append(dst)
    #tonorm: 0,1,2,3,4
    return bondf, edge_index, aed    

In [18]:
mnames = list(mols.keys())

datadir = {}
for name in tqdm.tqdm_notebook(mnames):
    a = g.get_group(name)
    nodef = getAtomNodeFeats(name,mols,a.values.tolist())
    bondf, edgeidx, aed = getBondEdgeFeats(name,mols,a.values.tolist())
    datadir[name] = [nodef, bondf, edgeidx, aed]

HBox(children=(IntProgress(value=0, max=45660), HTML(value='')))




In [19]:
test= test.drop(['atom_0', 'x_0', 'y_0', 'z_0', 'Gratio_x',
            'Eneg_x', 'radius_x', 'atomic_num_x', 'atom_1', 'x_1', 'y_1', 'z_1',
            'Gratio_y', 'Eneg_y', 'radius_y', 'atomic_num_y','numv_x','numv_y'],axis=1)

In [20]:
t = test.values.tolist()

In [21]:
@numba.jit(nopython=True)
def getDist(x0,y0,z0,x1,y1,z1):
    return np.sqrt((x0-x1)**2 + (y0-y1)**2 + (z0-z1)**2)

In [22]:
olds = np.seterr(all='raise')

In [23]:
testlist = []
l = len(t)
ids = []
for dp in tqdm.tqdm_notebook(t):
    name, id0, id1 = dp[1], dp[2], dp[3]
    n,b,e,aed = datadir[name]
    nodef,bondf,edgeidx = np.array(n), np.array(b), np.array(e)
    nodef,bondf,edgeidx = nodef.tolist(),bondf.tolist(),edgeidx.tolist()

    nodef[id0][-1] = 1
    nodef[id1][-1] = 1

    nid0 = nodef[id0]
    nid1 = nodef[id1]
    x0,y0,z0 = nid0[11],nid0[12],nid0[13]
    x1,y1,z1 = nid1[11],nid1[12],nid1[13]

    ll = len(nodef)
    ai0 = aed[id0]
    ai1 = aed[id1]

    G = nx.Graph()
    eds = [x for x in zip(edgeidx[0],edgeidx[1])]
    G.add_edges_from(eds)
    sp = nx.dijkstra_path(G,id0,id1)
    
    mp = sp[1]
    nodef[mp][-1] = 1
    tnm = nodef[mp]
    xm,ym,zm = tnm[11],tnm[12],tnm[13]
    xm0,ym0,zm0 = xm-x0,ym-y0,zm-z0
    xm1,ym1,zm1 = xm-x1,ym-y1,zm-z1
    for i in range(ll):
        if i not in ai0 and i != id0:
            temp = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
            tnf = nodef[i]
            xi,yi,zi = tnf[11],tnf[12],tnf[13]
            temp[0] = getDist(xi,yi,zi,x0,y0,z0)
            theta,cos,sin = getAngle(xi,yi,zi,x0,y0,z0)
            temp[1] = theta
            temp[2] = cos
            temp[3] = sin

            bondf.append(temp)
            bondf.append(temp)

            edgeidx[0].append(i)
            edgeidx[1].append(id0)

            edgeidx[0].append(id0)
            edgeidx[1].append(i)
        if i not in ai1 and  i != id1:
            temp = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
            tnf = nodef[i]
            xi,yi,zi = tnf[11],tnf[12],tnf[13]
            temp[0] = getDist(xi,yi,zi,x1,y1,z1)
            theta,cos,sin = getAngle(xi,yi,zi,x1,y1,z1)
            temp[1] = theta
            temp[2] = cos
            temp[3] = sin

            bondf.append(temp)
            bondf.append(temp)

            edgeidx[0].append(i)
            edgeidx[1].append(id1)

            edgeidx[0].append(id1)
            edgeidx[1].append(i)

    G = nx.Graph()
    eds = [x for x in zip(edgeidx[0],edgeidx[1])]
    G.add_edges_from(eds)
    for i in range(ll):
        if (not G.has_edge(i,mp)) and i != mp:
            temp = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
            tnf = nodef[i]
            xi,yi,zi = tnf[11],tnf[12],tnf[13]
            temp[0] = getDist(xi,yi,zi,xm,ym,zm)
            theta,cos,sin = getAngle(xi,yi,zi,xm,ym,zm)
            temp[1] = theta
            temp[2] = cos
            temp[3] = sin

            bondf.append(temp)
            bondf.append(temp)

            edgeidx[0].append(i)
            edgeidx[1].append(mp)

            edgeidx[0].append(mp)
            edgeidx[1].append(i)

    bonds = [x for x in zip(edgeidx[0],edgeidx[1])]
    intbonds = [(id0,mp),(mp,id1),(mp,id0),(id1,mp)]
    for i,bf in enumerate(bondf):
        i0,i1 = bonds[i]
        if (i0,i1) in intbonds:
            bf[-10] = 1
        nfa,nfb = nodef[i0],nodef[i1]
        xa,ya,za = nfa[11],nfa[12],nfa[13]
        xb,yb,zb = nfb[11],nfb[12],nfb[13]
        xi,yi,zi = xa-xb,ya-yb,za-zb
        a0,cos0,sin0 = getAngle(xi,yi,zi,x0,y0,z0)
        a1,cos1,sin1 = getAngle(xi,yi,zi,x1,y1,z1)
        am,cosm,sinm = getAngle(xi,yi,zi,xm,ym,zm)
        am0,cosm0,sinm0 = getAngle(xi,yi,zi,xm0,ym0,zm0)
        am1,cosm1,sinm1 = getAngle(xi,yi,zi,xm1,ym1,zm1)

        bf[-16] = am0
        bf[-15] = cosm0
        bf[-14] = sinm0
        bf[-13] = am1
        bf[-12] = cosm1
        bf[-11] = sinm1

        bf[-9] = a0
        bf[-8] = cos0
        bf[-7] = sin0
        bf[-6] = a1
        bf[-5] = cos1
        bf[-4] = sin1
        bf[-3] = am
        bf[-2] = cosm
        bf[-1] = sinm
#-24: dist at3 #-23: angle at3 #-22: cos at3 #-21: sin at3 #-20: angle m0 #-19: cos m0 #-18: sin m0
#-17: angle m1 #-16: cos m1 #-15: sin m1 #-14: x rel at3 #-13: y ---- #-12: z -----
    for i,nf in enumerate(nodef):
        if i == mp:
            nf[20] = getDist(x0,y0,z0,xm,ym,zm)
            nf[21] = getDist(x1,y1,z1,xm,ym,zm)
            #dist,angle,cos,sin to at3
            nf[-24] = 0
            nf[-23] = 0
            nf[-22] = 1
            nf[-21] = 0
            am0,cosm0,sinm0 =  getAngle(xm,ym,zm,xm0,ym0,zm0)
            nf[-20] = am0
            nf[-19] = cosm0
            nf[-18] = sinm0
            am1,cosm1,sinm1 =  getAngle(xm,ym,zm,xm1,ym1,zm1)
            nf[-17] = am1
            nf[-16] = cosm1
            nf[-15] = sinm1
            #xyz rel to at3
            nf[-14] = 0
            nf[-13] = 0
            nf[-12] = 0 

            ang0,cos0,sin0 = getAngle(xm,ym,zm,x0,y0,z0)
            nf[-7] = ang0 #angle with at0
            nf[-6] = cos0 #cos with at0
            nf[-5] = sin0 #sin with at0
            ang1,cos1,sin1 = getAngle(xm,ym,zm,x1,y1,z1)
            nf[-4] = ang1 #angle with at1
            nf[-3] = cos1 #cos with at1
            nf[-2] = sin1 #sin with at1

            nf[-10] = x1-xm
            nf[-9] = y1-ym
            nf[-8] = z1-zm#xyz rel to at1
            nf[11] = x0-xm
            nf[12] = y0-ym
            nf[13] = z0-zm#xyz rel to at1
        elif i == id0:
            nf[20] = 0
            nf[21] = getDist(x0,y0,z0,x1,y1,z1)

            am3,cosm3,sinm3 =  getAngle(x0,y0,z0,xm,ym,zm)
            nf[-24] = getDist(x0,y0,z0,xm,ym,zm)
            nf[-23] = am3
            nf[-22] = cosm3
            nf[-21] = sinm3
            am0,cosm0,sinm0 =  getAngle(x0,y0,z0,xm0,ym0,zm0)
            nf[-20] = am0
            nf[-19] = cosm0
            nf[-18] = sinm0
            am1,cosm1,sinm1 =  getAngle(x0,y0,z0,xm1,ym1,zm1)
            nf[-17] = am1
            nf[-16] = cosm1
            nf[-15] = sinm1
            #xyz rel to at3
            nf[-14] = xm-x0
            nf[-13] = ym-y0
            nf[-12] = zm-z0

            nf[-7] = 0
            nf[-6] = 1
            nf[-5] = 0
            a1,cos,sin = getAngle(x0,y0,z0,x1,y1,z1)
            nf[-4] = a1
            nf[-3] = cos
            nf[-2] = sin
            nf[-10] = x1-x0
            nf[-9] = y1-y0
            nf[-8] = z1-z0
            nf[11] = 0
            nf[12] = 0
            nf[13] = 0

        elif i == id1:
            nf[21] = 0
            nf[20] = getDist(x0,y0,z0,x1,y1,z1)

            am3,cosm3,sinm3 =  getAngle(x1,y1,z1,xm,ym,zm)
            nf[-24] = getDist(x1,y1,z1,xm,ym,zm)
            nf[-23] = am3
            nf[-22] = cosm3
            nf[-21] = sinm3
            am0,cosm0,sinm0 =  getAngle(x1,y1,z1,xm0,ym0,zm0)
            nf[-20] = am0
            nf[-19] = cosm0
            nf[-18] = sinm0
            am1,cosm1,sinm1 =  getAngle(x1,y1,z1,xm1,ym1,zm1)
            nf[-17] = am1
            nf[-16] = cosm1
            nf[-15] = sinm1
            #xyz rel to at3
            nf[-14] = xm-x1
            nf[-13] = ym-y1
            nf[-12] = zm-z1

            a0,cos,sin = getAngle(x0,y0,z0,x1,y1,z1)
            nf[-7] = a0
            nf[-6] = cos
            nf[-5] = sin
            nf[-4] = 0
            nf[-3] = 1
            nf[-2] = 0
            nf[-10] = 0
            nf[-9] = 0
            nf[-8] = 0
            nf[11] = x0-x1
            nf[12] = y0-y1
            nf[13] = z0-z1
        else:
            xi,yi,zi = nf[11],nf[12],nf[13]
            nf[21] = getDist(xi,yi,zi,x1,y1,z1)
            nf[20] = getDist(xi,yi,zi,x0,y0,z0)

            am3,cosm3,sinm3 =  getAngle(xi,yi,zi,xm,ym,zm)
            nf[-24] = getDist(xi,yi,zi,xm,ym,zm)
            nf[-23] = am3
            nf[-22] = cosm3
            nf[-21] = sinm3
            am0,cosm0,sinm0 =  getAngle(xi,yi,zi,xm0,ym0,zm0)
            nf[-20] = am0
            nf[-19] = cosm0
            nf[-18] = sinm0
            am1,cosm1,sinm1 =  getAngle(xi,yi,zi,xm1,ym1,zm1)
            nf[-17] = am1
            nf[-16] = cosm1
            nf[-15] = sinm1
            #xyz rel to at3
            nf[-14] = xm-x1
            nf[-13] = ym-y1
            nf[-12] = zm-z1

            a0,cos0,sin0 = getAngle(xi,yi,zi,x0,y0,z0)
            a1,cos1,sin1 = getAngle(xi,yi,zi,x1,y1,z1)
            nf[-7] = a0
            nf[-6] = cos0
            nf[-5] = sin0
            nf[-4] = a1
            nf[-3] = cos1
            nf[-2] = sin1
            nf[-10] = x1-xi
            nf[-9] = y1-yi
            nf[-8] = z1-zi
            nf[11] = x0-xi
            nf[12] = y0-yi
            nf[13] = z0-zi

    ids.append(dp[0])        
    x = torch.tensor(nodef, dtype=torch.float)
    edge_index = torch.tensor(edgeidx, dtype=torch.long)
    edge_attr = torch.tensor(bondf,dtype=torch.float)
    data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr)

    testlist.append(data)


HBox(children=(IntProgress(value=0, max=613138), HTML(value='')))




In [24]:
tmean = -0.27623939703519823
tsd = 4.524107451327379

In [25]:
test_loader = DataLoader(testlist, batch_size=256,shuffle=False)

In [26]:
ndim =128
edim = 64

In [27]:
torch.cuda.empty_cache()

In [28]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.lin0 = torch.nn.Linear(189, ndim)
        self.bn0 = BatchNorm1d(ndim)
        self.d0 = Dropout(0.1)
        
        nn = Sequential(Linear(25, edim), ReLU(), Linear(edim,2*edim), ReLU(),Linear(2*edim, ndim * ndim))
        self.conv = NNConv(ndim, ndim, nn, aggr='max', root_weight=True)
        self.gru = GRU(ndim, ndim)
        
        self.set2set = Set2Set(ndim, processing_steps=3)
        self.lin1 = torch.nn.Linear(2*ndim, 192)
        self.lin2 = torch.nn.Linear(192,160)
        self.lin3 = torch.nn.Linear(160, ndim)
        self.lin4 = torch.nn.Linear(ndim, 1)

    def forward(self, data):
        out = F.selu(self.lin0(data.x))
        out = self.bn0(out)
        out = self.d0(out)
        h = out.unsqueeze(0)
        
        for i in range(3):
            m = F.selu(self.conv(out, data.edge_index, data.edge_attr))
            out, h = self.gru(m.unsqueeze(0), h)
            out = out.squeeze(0)
        
        out = self.set2set(out, data.batch)
        out = F.selu(self.lin1(out))
        out = F.selu(self.lin2(out))
        out = F.selu(self.lin3(out))
        out = self.lin4(out)
        
        return out.view(-1)

In [29]:
#model2 = torch.load('MPNN_1JHN_SELU__MODEL_.pth')
model2 = torch.load('MPNN_2JHC_2FE.pth')
#xd = torch.load('Temp/t1.pth')

In [30]:
#model2 = xd['bestmodel']

In [31]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model2.to(device)
model2.eval()
preds = []
for dd in tqdm.tqdm_notebook(test_loader):
    dd = dd.to(device)
    x = model2(dd)
    x = x.cpu().detach().numpy()
    x = (x*tsd)+tmean
    x = list(x)
    preds +=x


HBox(children=(IntProgress(value=0, max=2396), HTML(value='')))




In [32]:
print(len(preds),len(ids))

613138 613138


In [33]:
s = pd.read_csv("75_MPNN_NFR_1_2JHN_2JHH3C_B1.csv")

In [34]:
s =  s[~s['id'].isin(ids)]

In [35]:
s.head()

Unnamed: 0,id,scalar_coupling_constant
613138,4658154,-7.576379
613139,4658155,-9.711175
613140,4658158,-9.709909
613141,4658163,-7.572326
613142,4658164,-9.710675


In [36]:
new = pd.DataFrame({'id':ids, 'scalar_coupling_constant':preds})

In [37]:
ff = pd.concat([new,s])

In [38]:
len(ff)

2505542

In [39]:
ff.to_csv("76_MPNN_NFR_1_2.csv",index=False)

In [None]:
len(ff)

In [None]:
ff.head()

In [None]:
s = pd.read_csv("C:/Users/Alamjeet Singh/AA_SCCKaggle/Restart/26_lgbm_RDKCCType.csv")

In [None]:
len(s)

Ensemble

In [None]:
import pandas as pd, numpy as np

In [None]:
lgb = pd.read_csv("C:/Users/Alamjeet Singh/AA_SCCKaggle/Restart/26_lgbm_RDKCCType.csv")
nn = pd.read_csv("C:/Users/Alamjeet Singh/AA_SCCKaggle/AA/27_MPNN1JHC_LGBM.csv")

In [None]:
train = pd.read_csv("C:/Users/Alamjeet Singh/Downloads/champs-scalar-coupling/test.csv")
train = train.groupby('type').get_group('1JHC')

In [None]:
train.head()

In [None]:
ids = train['id'].tolist()

In [None]:
nn = nn[nn['id'].isin(ids)]
lgb = lgb[lgb['id'].isin(ids)]

In [None]:
nn.sort_values(by=['id'],inplace=True)
lgb.sort_values(by=['id'],inplace=True)

In [None]:
nn.reset_index(inplace=True)
lgb.reset_index(inplace=True)

In [None]:
ens = (nn['scalar_coupling_constant'] + lgb['scalar_coupling_constant'])/2

In [None]:
ff = pd.DataFrame({'id':nn['id'],'scalar_coupling_constant':ens})

In [None]:
len(ff)

In [None]:
n = pd.read_csv("C:/Users/Alamjeet Singh/AA_SCCKaggle/AA/27_MPNN1JHC_LGBM.csv")

In [None]:
len(n)

In [None]:
n = n[~n['id'].isin(ff['id'])]

In [None]:
len(n)

In [None]:
ff.head()

In [None]:
n.head()

In [None]:
d = pd.concat([ff,n])

In [None]:
len(d)

In [None]:
d.to_csv("28_MPNN1JHC_LGBM_ENS.csv",index=False)

In [None]:
#add GAT layer
#try lin first, then conv3 in for loop only
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.lin0 = torch.nn.Linear(166, ndim)
        self.bn0 = BatchNorm1d(ndim)
        self.d0 = torch.nn.Dropout(0.1)
        
        nn = Sequential(Linear(9, edim), ReLU(), Linear(edim, ndim * ndim))
        self.conv = NNConv(ndim, ndim, nn, aggr='max', root_weight=True)
        self.gru = GRU(ndim, ndim)

        self.set2set = Set2Set(ndim, processing_steps=3)
        self.lin1 = torch.nn.Linear(2*ndim, ndim)
        #self.d1 = torch.nn.Dropout(0.5)
        self.lin2 = torch.nn.Linear(ndim, 1)

    def forward(self, data):
        out = F.relu(self.lin0(data.x))
        out = self.bn0(out)
        out = self.d0(out)
        h = out.unsqueeze(0)
        
        for i in range(3):
            m = F.relu(self.conv(out, data.edge_index, data.edge_attr))
            out, h = self.gru(m.unsqueeze(0), h)
            out = out.squeeze(0)
        
        out = self.set2set(out, data.batch)
        out = F.relu(self.lin1(out))
        #out = self.d1(out)
        out = self.lin2(out)
        
        return out.view(-1)