In [1]:
import pandas as pd
import numpy as np
from data_work import processed
df=pd.read_csv("/Users/mac/Dev/data/dta_IoT/new2test.csv")
#delete all null columns
id=[16,17,21,22,23,24]
col=df.columns
for idx in id:
    df=df.drop(col[idx],axis=1)
data=processed(df,"subcategory ")

In [2]:
data

Unnamed: 0,subcategory,pkSeqID,stime,flgs,proto,saddr,sport,daddr,dport,pkts,...,max,spkts,dpkts,sbytes,dbytes,rate,srate,drate,attack,category
0,4,1982,2490,0,2,5,4838,16,528,1,...,29,1,0,30,0,0,0,0,1,2
1,6,5114,2623,7,2,3,4669,24,901,4,...,908,3,1,74,1,35,22,0,1,0
2,7,6841,4276,0,3,0,2039,25,901,9,...,2132,9,0,71,0,370,554,0,1,0
3,7,6835,4272,0,3,0,2002,25,901,9,...,2131,9,0,71,0,371,555,0,1,0
4,3,1216,1126,0,3,1,2134,27,697,1,...,0,1,0,41,0,577,762,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7021,2,2995,5569,0,2,3,1062,20,372,27,...,2443,14,15,462,130,1574,1267,1037,1,3
7022,4,2611,2271,0,2,3,3079,20,872,1,...,73,0,1,0,1,2417,0,0,1,2
7023,7,6744,4190,0,3,0,1293,25,901,9,...,2165,9,0,71,0,372,556,0,1,0
7024,1,4610,4966,0,2,2,3931,20,900,9,...,1940,5,4,240,81,586,398,761,1,0


In [3]:
import torch
data=torch.tensor(data.values)

In [4]:
data


tensor([[   4, 1982, 2490,  ...,    0,    1,    2],
        [   6, 5114, 2623,  ...,    0,    1,    0],
        [   7, 6841, 4276,  ...,    0,    1,    0],
        ...,
        [   7, 6744, 4190,  ...,    0,    1,    0],
        [   1, 4610, 4966,  ...,  761,    1,    0],
        [   0, 3971, 5438,  ..., 1149,    1,    3]])

In [5]:
data.size()

torch.Size([7026, 29])

In [6]:
import numpy as np

def onehot_encode(labels, num_classes):
    num_samples = len(labels)
    encoded_labels = np.zeros((num_samples, num_classes))
    for i in range(num_samples):
        label = labels[i]
        encoded_labels[i, label] = 1
    return encoded_labels

In [7]:
import utils, torch, time, os, pickle
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.autograd import Variable

class generator(nn.Module):
    def __init__(self, args):
        super(generator, self).__init__()
        self.input_dim = args.noise_size
        self.output_dim = args.n_features
        self.class_num = args.n_classes
        self.label_emb = nn.Embedding(self.class_num,self.class_num)

        def block(in_feat, out_feat, normalize=True):
            layers = [nn.Linear(in_feat, out_feat)]
            if normalize:
                layers.append(nn.BatchNorm1d(out_feat, 0.8))
            layers.append(nn.LeakyReLU(0.2, inplace=True))
            return layers
    
        self.model = nn.Sequential(
            *block(self.input_dim + self.class_num, 128, normalize=False),
            *block(128, 256),
            *block(256, 512),
            *block(512, 1024),
            nn.Linear(1024,self.output_dim)
        )

    def forward(self, noise ,label):
        x = torch.cat((self.label_emb(label).squeeze(),noise), 1)
        x = self.model(x)
        return x

class discriminator(nn.Module):
    def __init__(self,args):
        super(discriminator, self).__init__()
        self.input_dim = args.n_features
        self.output_dim = args.n_features
        self.class_num = args.n_classes
        self.label_emb = nn.Embedding(self.class_num,self.class_num)

        self.model = nn.Sequential(
            nn.Linear((self.class_num + self.input_dim), 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 512),
            nn.Dropout(0.4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 512),
            nn.Dropout(0.4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 256)
        )
        self.adv_layer = nn.Sequential(nn.Linear(256, 1), nn.Sigmoid())
        self.aux_layer = nn.Sequential(nn.Linear(256, self.class_num), nn.Softmax())
        

    def forward(self, input ,label):
        # Concatenate label embedding and image to produce input
        x = torch.cat((self.label_emb(label).squeeze(), input), 1)
        x=self.model(x)
        real=self.adv_layer(x)
        label=self.aux_layer(x)
        return real,label 
    
class CGAN(object):
    def __init__(self, args):
        # parameters
        self.epoch = args.epoch
        self.batch_size = args.batch_size
        self.save_dir = args.save_dir
        self.result_dir = args.result_dir
        self.dataset = args.dataset
        self.log_dir = args.log_dir
        self.gpu_mode = args.gpu_mode
        self.model_name = args.gan_type
        self.z_dim = args.z_dim
        self.class_num = args.n_class
        # load dataset
        self.data_loader = DataLoader(self.dataset, batch_size=self.batch_size, shuffle=True)
        data=next(iter(self.data_loader))
        #option
        dim=int(data.shape[1])-8

        # networks init
        self.G = generator(args)
        self.D = discriminator(args)
        self.G_optimizer = optim.RMSprop(self.G.parameters(), lr=args.lrG, alpha=0.9)
        self.D_optimizer = optim.RMSprop(self.D.parameters(), lr=args.lrD, alpha=0.9)
        
        self.MSE_loss = torch.nn.MSELoss()
        self.BCE_loss=nn.BCELoss()
        # Loss functions
        self.adversarial_loss = torch.nn.BCELoss()
        self.auxiliary_loss = torch.nn.CrossEntropyLoss()
        print('---------- Networks architecture -------------')
        utils.print_network(self.G)
        utils.print_network(self.D)
        print('-----------------------------------------------')



    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []

        self.y_real_ = Variable(torch.FloatTensor(self.batch_size, 1).fill_(1.0), requires_grad=False)
        self.y_fake_ = Variable(torch.FloatTensor(self.batch_size, 1).fill_(0.0), requires_grad=False)

        #self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1)



        self.D.train()
        print('training start!!')
        start_time = time.time()
        for epoch in range(self.epoch):
            self.G.train()
            epoch_start_time = time.time()
            for iter, da in enumerate(self.data_loader):
                x_=da[:,1:].float()
                y_=da[:,:1].int()
                if iter == self.data_loader.dataset.__len__() // self.batch_size:
                    break

                z_ = Variable(torch.FloatTensor(np.random.normal(0, 1, (self.batch_size, self.z_dim))))
                y_check= onehot_encode(y_,8)
                y_check=torch.tensor(y_check)
                # update D network
                self.D.train()
                self.G.eval()
                self.D_optimizer.zero_grad()


                real,label = self.D(x_, y_)
                #D_real_loss = self.MSE_loss(D_real, self.y_real_)
                G_ = self.G(z_, y_)
                fake,label_ = self.D(G_, y_)
                #D_fake_loss = self.MSE_loss(D_fake, self.y_fake_)
                D_fake_loss=(self.adversarial_loss(fake, self.y_fake_) + self.auxiliary_loss(label_, y_check)) / 2
                D_real_loss=(self.adversarial_loss(real, self.y_real_) + self.auxiliary_loss(label, y_check)) / 2


                D_loss = D_real_loss + D_fake_loss
                self.train_hist['D_loss'].append(D_loss.item())

                D_loss.backward()
                self.D_optimizer.step()

                 # update G network
                self.D.eval()
                self.G.train()
                self.G_optimizer.zero_grad()

                z_=torch.rand((self.batch_size, self.z_dim))
                G_ = self.G(z_, y_)
                real,label = self.D(G_, y_)
                #G_loss = self.MSE_loss(D_fake, self.y_real_)

                G_loss=0.5 * (self.adversarial_loss(real, self.y_real_) + self.auxiliary_loss(label, y_check))
                self.train_hist['G_loss'].append(G_loss.item())

                G_loss.backward()
                self.G_optimizer.step()

               
                if (iter + 1) == self.data_loader.dataset.__len__() // self.batch_size:
                    print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" %
                          ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item()))

            self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time)
                

        self.train_hist['total_time'].append(time.time() - start_time)
        print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']),
              self.epoch, self.train_hist['total_time'][0]))
        print("Training finish!")


In [8]:
class Args:
    def __init__(self,data):
        self.epoch = 10
        self.batch_size = 32
        self.dataset = data
        self.log_dir = 'logs/'
        self.gpu_mode = True
        self.gan_type = 'cGAN'
        self.z_dim = 10
        self.n_class = 8
        self.sample_num = self.n_class ** 2
        self.lrG=0.0001
        self.lrD=0.0001
        self.n_epochs=self.n_class
        self.n_classes=self.n_class
        self.noise_size=self.z_dim
        self.n_features=28

In [9]:
arg=Args(data)
print(arg.dataset)

tensor([[   4, 1982, 2490,  ...,    0,    1,    2],
        [   6, 5114, 2623,  ...,    0,    1,    0],
        [   7, 6841, 4276,  ...,    0,    1,    0],
        ...,
        [   7, 6744, 4190,  ...,    0,    1,    0],
        [   1, 4610, 4966,  ...,  761,    1,    0],
        [   0, 3971, 5438,  ..., 1149,    1,    3]])


In [10]:
model=CGAN(arg)

---------- Networks architecture -------------
generator(
  (label_emb): Embedding(8, 8)
  (model): Sequential(
    (0): Linear(in_features=18, out_features=128, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Linear(in_features=128, out_features=256, bias=True)
    (3): BatchNorm1d(256, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)
    (4): LeakyReLU(negative_slope=0.2, inplace=True)
    (5): Linear(in_features=256, out_features=512, bias=True)
    (6): BatchNorm1d(512, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)
    (7): LeakyReLU(negative_slope=0.2, inplace=True)
    (8): Linear(in_features=512, out_features=1024, bias=True)
    (9): BatchNorm1d(1024, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)
    (10): LeakyReLU(negative_slope=0.2, inplace=True)
    (11): Linear(in_features=1024, out_features=28, bias=True)
  )
)
Total number of parameters: 724700
discriminator(
  (label_emb): Embedding(8, 8)
  (model): S

In [11]:
model.train()

training start!!


  input = module(input)


Epoch: [ 1] [ 219/ 219] D_loss: 2.17477554, G_loss: 1.36390997
Epoch: [ 2] [ 219/ 219] D_loss: 2.14600681, G_loss: 1.33152789
Epoch: [ 3] [ 219/ 219] D_loss: 2.42999448, G_loss: 1.55344171
Epoch: [ 4] [ 219/ 219] D_loss: 1.87071966, G_loss: 1.38079383
Epoch: [ 5] [ 219/ 219] D_loss: 1.70050527, G_loss: 1.48585143
Epoch: [ 6] [ 219/ 219] D_loss: 1.66830478, G_loss: 1.42419575
Epoch: [ 7] [ 219/ 219] D_loss: 1.61615343, G_loss: 1.52105531
Epoch: [ 8] [ 219/ 219] D_loss: 1.62579116, G_loss: 1.03923865
Epoch: [ 9] [ 219/ 219] D_loss: 1.54864334, G_loss: 1.83200431
Epoch: [10] [ 219/ 219] D_loss: 1.44122551, G_loss: 1.87926778
Avg one epoch time: 2.59, total 10 epochs time: 25.87
Training finish!


In [None]:
y=torch.tensor([[1],[2],[3],[4],[5],[6],[7],[0],[1],[2],[3],[4],[5],[6],[7],[0],[1],[2],[3],[4],[5],[6],[7],[0]])




In [None]:
y

In [None]:
len=y.shape[0]
len

In [None]:
len=y.shape[0]
z= torch.rand(len,10)
y=torch.tensor(y)

In [None]:
mm=torch.cat([z,y],1)

In [None]:
mm

In [None]:
mm.shape[1]

In [None]:
data_fake=model.G(z,y)

In [None]:
data_fake