In [2]:
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
from torch.utils.data import Dataset,DataLoader
import torch.optim as torch_optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
from datetime import datetime
import pandas_profiling as ppf
# from tensorboardX import SummaryWriter
# writer=SummaryWriter(logdir='./neuron')

In [3]:
encoded_all_data=pd.read_csv(r'./data/created_data/encoded_all_data.csv').reset_index(drop=True).set_index('ID')

In [4]:
# making all variables categorical
for col in encoded_all_data.columns:
    encoded_all_data[col] = encoded_all_data[col].astype('category')

In [5]:
txt=encoded_all_data[['行业','企业类型','控制人类型','区域']]
txt

Unnamed: 0_level_0,行业,企业类型,控制人类型,区域
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
28.0,0,0,0,6
230.0,3,0,0,1
429.0,2,4,1,3
727.0,5,3,0,3
1137.0,1,2,0,1
...,...,...,...,...
5999995.0,2,1,0,5
5999996.0,5,0,1,4
5999997.0,0,4,0,6
5999998.0,3,3,1,5


In [6]:
#categorical embedding for columns having more than two values?
embedded_cols = {n: len(col.cat.categories) for n,col in txt.items()}
embedded_cols

{'行业': 6, '企业类型': 5, '控制人类型': 2, '区域': 7}

In [7]:
embedded_col_names = embedded_cols.keys()
print(embedded_col_names)
len(encoded_all_data.columns) - len(embedded_cols) #number of numerical columns

dict_keys(['行业', '企业类型', '控制人类型', '区域'])


24

In [8]:
embedding_sizes = [(n_categories, min(50, (n_categories+1)//2)) for _,n_categories in embedded_cols.items()]
embedding_sizes

[(6, 3), (5, 3), (2, 1), (7, 4)]

In [9]:
init_x=encoded_all_data.dropna(subset=['flag']).drop(['flag'],axis=1)
init_y=encoded_all_data.dropna(subset=['flag'])['flag'].to_numpy()
train_X,val_X, train_y, val_y = train_test_split(init_x,init_y, test_size=0.30, random_state=0)
display(train_X,train_y)

Unnamed: 0_level_0,专利,企业类型,区域,商标,控制人持股比例,控制人类型,注册时间,注册资本,著作权,行业,...,项目融资和政策融资成本,从业人数,资产总额,负债总额,营业总收入,主营业务收入,利润总额,净利润,纳税总额,所有者权益合计
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5989257.0,0.0,1,3,0.0,0.70,1,4.0,-0.973541,0.0,4,...,0.135151,0.563362,-0.536135,-0.609535,-0.405129,-0.448206,-0.248830,-0.204542,0.124025,0.432645
5991406.0,0.0,3,6,1.0,0.79,1,11.0,-0.254808,0.0,1,...,-0.340444,-0.212910,-0.256340,-0.397152,-0.520638,-0.499634,-0.355941,0.082914,-0.157695,0.399039
5987623.0,1.0,1,4,1.0,0.90,0,10.0,1.179153,0.0,3,...,-0.116104,0.063135,-0.270980,-0.065330,-0.117421,-0.086136,-0.252447,0.356294,-0.020507,-0.221159
185734.0,1.0,0,5,0.0,0.85,1,5.0,-0.447639,0.0,3,...,0.575990,-0.932660,0.018246,0.396604,0.095771,-0.094450,0.064934,-0.574421,-0.463028,-0.694906
5978163.0,0.0,4,1,1.0,0.85,0,4.0,-0.128591,1.0,3,...,-0.340444,-0.088663,-0.034559,-0.045658,-0.416634,-0.435343,-0.210012,-0.090157,-0.106243,0.044631
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5985562.0,1.0,1,1,0.0,0.81,1,6.0,0.611179,1.0,1,...,-0.340444,-0.556945,0.183051,-0.071368,-0.000516,-0.009069,0.379197,-0.270317,-0.232909,0.345724
5997416.0,0.0,4,6,0.0,0.52,1,9.0,-0.054965,0.0,1,...,0.179926,-0.310291,-0.098579,-0.079990,0.037919,0.089179,-0.357966,-0.171569,-0.044199,0.027948
5995645.0,1.0,2,3,0.0,0.60,0,8.0,-0.184687,1.0,5,...,0.594195,0.544538,-0.037865,0.313914,-0.341026,-0.279906,-0.262372,0.078621,-0.152383,-0.611328
5986066.0,0.0,4,4,1.0,0.75,0,13.0,0.257071,1.0,3,...,-0.272015,-1.008557,-0.660721,-0.721325,-0.393754,-0.371140,-0.221115,-0.294612,-0.416134,0.476450


array([0., 0., 0., ..., 0., 0., 1.])

In [10]:
class CompanyDataset(Dataset):
    def __init__(self, X, Y, embedded_col_names):
        X = X.copy()
        self.X1 = X.loc[:,embedded_col_names].copy().values.astype(np.int64) #categorical columnss
        self.X2 = X.drop(columns=embedded_col_names).copy().values.astype(np.float32) #numerical columns
        self.y = Y
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return self.X1[idx], self.X2[idx], self.y[idx]

In [11]:
#creating train and valid datasets
train_ds = CompanyDataset(train_X, train_y,embedded_col_names)
valid_ds = CompanyDataset(val_X,val_y,embedded_col_names)

In [12]:
# train_X
train_y

array([0., 0., 0., ..., 0., 0., 1.])

In [13]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

In [14]:
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

In [15]:
class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [16]:
device = get_default_device()
device

device(type='cuda')

In [17]:
class FlagModel(nn.Module):  #分为两部分categorical 和 continuous
    def __init__(self, embedding_sizes, n_cont):
        super().__init__()
        self.embeddings = nn.ModuleList([nn.Embedding(categories, size) for categories,size in embedding_sizes])
        n_emb = sum(e.embedding_dim for e in self.embeddings) #length of all embeddings combined  
        
        self.n_emb, self.n_cont = n_emb, n_cont
        
        self.lin1 = nn.Linear(self.n_emb + self.n_cont, 200)  #线性层输入： 输出：(200)
        self.lin2 = nn.Linear(200, 70)
        self.lin3 = nn.Linear(70, 2)
        self.bn1 = nn.BatchNorm1d(self.n_cont)
        self.bn2 = nn.BatchNorm1d(200)
        self.bn3 = nn.BatchNorm1d(70)
        
        self.emb_drop = nn.Dropout(0.6)
        self.drops = nn.Dropout(0.3)
        

    def forward(self, x_cat, x_cont):  #描述了一个前向计算图
        x = [e(x_cat[:,i]) for i,e in enumerate(self.embeddings)]   #(embeddings): ModuleList(
#                                                                                  (0): Embedding(992, 50)
#                                                                                  (1): Embedding(51, 26)
#   )
        x = torch.cat(x, 1)
        x = self.emb_drop(x)  #把categorical  drop_out
        
        x2 = self.bn1(x_cont)   #把continuous标准化
        
        x = torch.cat([x, x2], 1)  #把categorical 和 continuous 合并
        x = F.relu(self.lin1(x))  #线性层并激活
        x = self.drops(x)        #drop_out
        
        x = self.bn2(x)         #标准化
        x = F.relu(self.lin2(x)) #线性层并激活
        x = self.drops(x)        #drop_out
        x = self.bn3(x)          #标准化
        x = self.lin3(x)         #线性层

#         x=torch.sigmoid(x)
        return x

In [18]:
model = FlagModel(embedding_sizes, 23)
to_device(model, device)

FlagModel(
  (embeddings): ModuleList(
    (0): Embedding(6, 3)
    (1): Embedding(5, 3)
    (2): Embedding(2, 1)
    (3): Embedding(7, 4)
  )
  (lin1): Linear(in_features=34, out_features=200, bias=True)
  (lin2): Linear(in_features=200, out_features=70, bias=True)
  (lin3): Linear(in_features=70, out_features=2, bias=True)
  (bn1): BatchNorm1d(23, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm1d(70, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (emb_drop): Dropout(p=0.6, inplace=False)
  (drops): Dropout(p=0.3, inplace=False)
)

In [19]:
def get_optimizer(model, lr = 0.001, wd = 0.0):
    parameters = filter(lambda p: p.requires_grad, model.parameters()) #filter过滤序列，留下True
    optim = torch_optim.Adam(parameters, lr=lr, weight_decay=wd)
    return optim

In [20]:
def train_model(model, optim, train_dl,i):
    model.train()
    total = 0
    sum_loss = 0
    for x1, x2, y in train_dl:
        batch = y.shape[0]
        output = model(x1, x2)
        y=y.long()
        loss = F.cross_entropy(output, y)   
        optim.zero_grad()
        loss.backward()
        optim.step()
        total += batch
        sum_loss += batch*(loss.item())
#     writer.add_scalar('train_loss',sum_loss/total,i)
    return sum_loss/total

In [21]:
def val_loss(model, valid_dl,i):
    model.eval()
    total = 0
    sum_loss = 0
    correct = 0
    for x1, x2, y in valid_dl:
        current_batch_size = y.shape[0]
        out = model(x1, x2)
        y=y.long()
        loss = F.cross_entropy(out, y)
        sum_loss += current_batch_size*(loss.item())
        total += current_batch_size
        pred = torch.max(out, 1)[1]
        correct += (pred == y).float().sum().item()
#         correct = torch.mean((pred == y).float())
#     writer.add_scalar('valid_loss',sum_loss/total,i)
#     writer.add_scalar('valid_acc',correct/total,i)
    print("valid loss %f and accuracy %f " % (sum_loss/total, correct/total))
    return sum_loss/total, correct/total

In [22]:
def train_loop(model, epochs, lr=0.01, wd=0.0):
    optim = get_optimizer(model, lr = lr, wd = wd)
    for i in range(epochs): 
        loss = train_model(model, optim, train_dl,i)
        print("training loss: ", loss)
        _,acc=val_loss(model, valid_dl,i)
        if acc>0.987:
            break
    return 

In [23]:
batch_size = 1000
train_dl = DataLoader(train_ds, batch_size=batch_size,shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=batch_size,shuffle=True)

In [24]:
train_dl = DeviceDataLoader(train_dl,device)
valid_dl = DeviceDataLoader(valid_dl,device)

In [25]:
train_loop(model, epochs=1000, lr=0.0001, wd=0.05)

training loss:  0.7320813578402688
valid loss 0.684440 and accuracy 0.525443 
training loss:  0.6695158955125878
valid loss 0.622389 and accuracy 0.645528 
training loss:  0.6279207590551307
valid loss 0.584613 and accuracy 0.688801 
training loss:  0.594572888764812
valid loss 0.553409 and accuracy 0.725713 
training loss:  0.5655740384266066
valid loss 0.521922 and accuracy 0.766191 
training loss:  0.5423942020375919
valid loss 0.498335 and accuracy 0.786527 
training loss:  0.5243165813095902
valid loss 0.473491 and accuracy 0.808597 
training loss:  0.4985568637160064
valid loss 0.447596 and accuracy 0.832305 
training loss:  0.48472217305440896
valid loss 0.423317 and accuracy 0.855243 
training loss:  0.46740592936372727
valid loss 0.409965 and accuracy 0.856881 
training loss:  0.4479378347395275
valid loss 0.382522 and accuracy 0.878470 
training loss:  0.43339508976750396
valid loss 0.362330 and accuracy 0.887336 
training loss:  0.41192162821841255
valid loss 0.338494 and ac

valid loss 0.094774 and accuracy 0.970316 
training loss:  0.0997064634985232
valid loss 0.093471 and accuracy 0.971762 
training loss:  0.10074259869394835
valid loss 0.098800 and accuracy 0.969931 
training loss:  0.09749359002500595
valid loss 0.094115 and accuracy 0.969931 
training loss:  0.10150973591399523
valid loss 0.095059 and accuracy 0.970894 
training loss:  0.09829038267946566
valid loss 0.094764 and accuracy 0.969738 
training loss:  0.10078439678595465
valid loss 0.093960 and accuracy 0.971569 
training loss:  0.09759428823380303
valid loss 0.094530 and accuracy 0.970605 
training loss:  0.0990275999153204
valid loss 0.095745 and accuracy 0.968774 
training loss:  0.09774179994981178
valid loss 0.095100 and accuracy 0.970991 
training loss:  0.09585771050060418
valid loss 0.095841 and accuracy 0.969352 
training loss:  0.10866428776265609
valid loss 0.096243 and accuracy 0.970702 
training loss:  0.09964475348119065
valid loss 0.100510 and accuracy 0.965112 
training lo

valid loss 0.084248 and accuracy 0.970894 
training loss:  0.0843842652492762
valid loss 0.082748 and accuracy 0.975135 
training loss:  0.07942442883049819
valid loss 0.079854 and accuracy 0.976484 
training loss:  0.08093319668312067
valid loss 0.080343 and accuracy 0.976291 
training loss:  0.07635019181243292
valid loss 0.077724 and accuracy 0.975810 
training loss:  0.07873663436239789
valid loss 0.077049 and accuracy 0.976002 
training loss:  0.08053684864939244
valid loss 0.088339 and accuracy 0.967618 
training loss:  0.07882888694413828
valid loss 0.078676 and accuracy 0.977833 
training loss:  0.08621348915233844
valid loss 0.078200 and accuracy 0.977062 
training loss:  0.07670866028927094
valid loss 0.076564 and accuracy 0.977062 
training loss:  0.0750380117219663
valid loss 0.074705 and accuracy 0.979279 
training loss:  0.0811005455921284
valid loss 0.078679 and accuracy 0.974460 
training loss:  0.07580643216663847
valid loss 0.077220 and accuracy 0.975713 
training los

valid loss 0.069370 and accuracy 0.984194 
training loss:  0.07775101000018374
valid loss 0.071933 and accuracy 0.984194 
training loss:  0.08104140705183414
valid loss 0.079431 and accuracy 0.981399 
training loss:  0.08126385461708921
valid loss 0.072851 and accuracy 0.982267 
training loss:  0.08087886087054735
valid loss 0.076513 and accuracy 0.982556 
training loss:  0.07417003357732485
valid loss 0.074007 and accuracy 0.983327 
training loss:  0.08066914440609727
valid loss 0.072634 and accuracy 0.983809 
training loss:  0.07721472069404414
valid loss 0.071022 and accuracy 0.982363 
training loss:  0.08512987852618542
valid loss 0.076068 and accuracy 0.982267 
training loss:  0.07656045797879894
valid loss 0.073260 and accuracy 0.984291 
training loss:  0.08109442021399835
valid loss 0.081311 and accuracy 0.980918 
training loss:  0.08265006714312745
valid loss 0.072281 and accuracy 0.985158 
training loss:  0.07577121954088721
valid loss 0.074633 and accuracy 0.983327 
training 

valid loss 0.095883 and accuracy 0.982941 
training loss:  0.09904812365740794
valid loss 0.094521 and accuracy 0.978508 
training loss:  0.09549235061096657
valid loss 0.091589 and accuracy 0.985351 
training loss:  0.10278566090804965
valid loss 0.094373 and accuracy 0.984676 
training loss:  0.09715449689627087
valid loss 0.097646 and accuracy 0.983905 
training loss:  0.10273534695015234
valid loss 0.095441 and accuracy 0.979568 
training loss:  0.0986249762496764
valid loss 0.093456 and accuracy 0.984387 
training loss:  0.10625877316058746
valid loss 0.096029 and accuracy 0.984098 
training loss:  0.09941371031701368
valid loss 0.094527 and accuracy 0.984291 
training loss:  0.09474632669651421
valid loss 0.091600 and accuracy 0.983423 
training loss:  0.09733984770557381
valid loss 0.097021 and accuracy 0.981014 
training loss:  0.10276726416637783
valid loss 0.094909 and accuracy 0.982749 
training loss:  0.09861465548432906
valid loss 0.095262 and accuracy 0.984483 
training l

valid loss 0.095376 and accuracy 0.985158 
training loss:  0.10373026137507994
valid loss 0.095984 and accuracy 0.984387 
training loss:  0.09626447205522244
valid loss 0.094802 and accuracy 0.981303 
training loss:  0.09542104534888535
valid loss 0.094440 and accuracy 0.985640 
training loss:  0.10120761923378695
valid loss 0.098735 and accuracy 0.984098 
training loss:  0.10661250721202807
valid loss 0.098992 and accuracy 0.977930 
training loss:  0.10427391494612621
valid loss 0.099806 and accuracy 0.977737 
training loss:  0.10076129898891809
valid loss 0.095995 and accuracy 0.984676 
training loss:  0.09876784411925633
valid loss 0.093054 and accuracy 0.982845 
training loss:  0.10269520981563532
valid loss 0.098484 and accuracy 0.983809 
training loss:  0.110412823271452
valid loss 0.099869 and accuracy 0.982267 
training loss:  0.10131485151066658
valid loss 0.096743 and accuracy 0.985544 
training loss:  0.10366746439781319
valid loss 0.097338 and accuracy 0.981881 
training lo

### 以下为贴标签部分

In [26]:
# 目前最好：  98.7953  lr=0.0001 dw=0.05 adam

In [27]:
# writer.close()

In [28]:
non_label=encoded_all_data.drop(init_x.index)
non_label_X=non_label.drop(['flag'],axis=1)
estimate_y=estimate_y=np.array([1]*non_label_X.shape[0])
non_label_X

Unnamed: 0_level_0,专利,企业类型,区域,商标,控制人持股比例,控制人类型,注册时间,注册资本,著作权,行业,...,项目融资和政策融资成本,从业人数,资产总额,负债总额,营业总收入,主营业务收入,利润总额,净利润,纳税总额,所有者权益合计
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1080523.0,0.0,0,1,0.0,0.57,0,0.0,-0.254808,0.0,4,...,-0.340444,-0.351677,0.184651,0.417659,-0.258273,-0.272971,-0.235634,-0.424045,-0.463028,-0.522521
1080756.0,0.0,0,0,0.0,0.79,0,5.0,1.158117,0.0,5,...,-0.340444,-0.376625,1.042700,1.238097,0.496213,0.527454,1.150685,-0.738470,-0.463028,-0.921111
1080951.0,1.0,2,4,1.0,0.93,0,4.0,0.463926,0.0,2,...,0.510628,0.342302,0.870810,0.557917,0.176418,0.061825,0.351885,-0.604207,-0.463028,0.077015
1080972.0,0.0,4,4,0.0,0.58,1,0.0,-1.699287,0.0,1,...,-0.340444,0.149991,-0.955436,-0.773011,-0.726476,-0.690639,-0.618036,-0.230269,-0.463028,0.205945
1081027.0,1.0,1,6,1.0,0.72,1,12.0,1.231744,0.0,5,...,-0.035733,0.421098,-0.210764,-0.388153,-0.282905,-0.202919,-0.323033,-0.415748,-0.463028,0.438726
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5999614.0,1.0,4,0,1.0,0.51,0,2.0,-0.563337,0.0,2,...,0.143578,-0.266909,0.724965,-0.034803,0.071062,-0.043789,0.206325,0.250782,0.157708,0.982942
5999746.0,0.0,0,3,0.0,0.91,1,11.0,0.842576,0.0,2,...,-0.340444,-0.485126,-0.465091,-0.262250,-0.274809,-0.346624,-0.239135,0.038662,0.204388,-0.104008
5999945.0,0.0,3,4,1.0,0.83,0,13.0,-1.646697,1.0,4,...,-0.340444,-0.433419,-0.970887,-0.810904,-0.761767,-0.724710,-0.644649,-0.191496,-0.449280,0.255177
5999952.0,1.0,0,3,1.0,0.62,0,6.0,-0.447639,1.0,5,...,-0.340444,0.306643,-0.441269,-0.382515,-0.502252,-0.397663,-0.367992,-0.121098,-0.325291,0.136300


In [29]:
# making all variables categorical
for col in non_label.columns:
    non_label[col] = non_label[col].astype('category')

In [30]:
#categorical embedding for columns having more than two values
embedded_cols = {n: len(col.cat.categories) for n,col in txt.items()}
embedded_cols

{'行业': 6, '企业类型': 5, '控制人类型': 2, '区域': 7}

In [31]:
embedding_sizes = [(n_categories, min(50, (n_categories+1)//2)) for _,n_categories in embedded_cols.items()]
embedding_sizes

[(6, 3), (5, 3), (2, 1), (7, 4)]

In [32]:
test_ds = CompanyDataset(non_label_X,estimate_y,embedded_col_names)
batch_size2 = 10275
test_dl = DataLoader(test_ds,batch_size=batch_size2,shuffle=False) #返回的是可迭代对象
# first=iter(test_dl)
# next(first)
test_dl = DeviceDataLoader(test_dl, device)

In [33]:
def predict(model, test_dl):
    model.eval()
    for x1, x2, y in test_dl:
        out = model(x1, x2)
        pred = torch.max(out, 1)[1]
        return pred.tolist()

In [34]:
flag=predict(model,test_dl)
flag

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,


In [38]:
neuron_final=pd.concat([pd.DataFrame(index=non_label_X.index,data=flag,columns=['flag']),],axis=1)
neuron_final=encoded_all_data.combine_first(neuron_final)
neuron_final.to_csv(r'./data/created_data/neuron_final.csv')

In [36]:
ppf.ProfileReport(neuron_final)

  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_text(s, 0.0, flags=LOAD_NO_HINTING)
  font.set_te

  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
 

Tab(children=(HTML(value='<div id="overview-content" class="row variable spacing">\n    <div class="row">\n   …

