In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import copy
import math
from torchsummary import summary
import torch.nn as nn
import torch.nn.functional as F
from collections import defaultdict
from math import sqrt
import torch.optim as optim


In [2]:
def changenet(net, layer_name, layer_new_weight, layer_new_bias):
    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
            self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
            self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
            self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

            self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
            self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
            self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)
            if(layer_name == "fc1"):
                self.fc1 = nn.Linear(16 * 4 * 4, len(layer_new_weight))
            elif(layer_name == "fc2"):
                self.fc2 = nn.Linear(len(layer_new_weight[0]), len(layer_new_weight))
            else:
                self.fc3 = nn.Linear(len(layer_new_weight[0]), 10)

        def forward(self, x):
            x = self.pool(F.relu(self.conv1(x)))
            x = self.pool(F.relu(self.conv2(x)))
            x = x.view(-1, 16 * 4 * 4)
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return F.log_softmax(x, dim=1)

    net1 = Net()
    
    for param_tensor in net.state_dict():
        if(layer_name not in param_tensor):
            if(net.state_dict()[param_tensor].dim()== 1):
                net1.state_dict()[param_tensor]=copy.deepcopy(net.state_dict()[param_tensor])
            elif(net.state_dict()[param_tensor].dim()== 2):
                for i in range(len(net.state_dict()[param_tensor])):
                    net1.state_dict()[param_tensor][i]=copy.deepcopy(net.state_dict()[param_tensor][i])
            else:
                for i in range(len(net.state_dict()[param_tensor])):
                    for j in range(len(net.state_dict()[param_tensor][i])):
                        for k in range(len(net.state_dict()[param_tensor][i][j])):
                            net1.state_dict()[param_tensor][i][j][k]=copy.deepcopy(net.state_dict()[param_tensor][i][j][k])
        else:
            if("weight" in param_tensor):
                for i in range(len(net1.state_dict()[param_tensor])):
                    net1.state_dict()[param_tensor][i]=copy.deepcopy(layer_new_weight[i])
            else:
                net1.state_dict()[param_tensor]=copy.deepcopy(layer_new_bias)
    return net1

In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])
testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=200,
                                         shuffle=False, num_workers=2)

In [4]:
# calculate the Euclidean distance between two vectors
def euclidean_distance(row1, row2):
	distance = 0.0
	for i in range(len(row1)):
		distance += (row1[i] - row2[i])**2
	return sqrt(distance)

In [25]:
def findsets(dlen, nodes, distances):
    cntn=True
    dlen = dlen
    print("desired len=",dlen)
    cut_off=distances[(len(distances)//2)]
    cmax=distances[-1]
    cmin=distances[0]
    diff=0
    itr=0
    chng=0.001*cut_off
    #print("chng=",chng)
    while(cntn):
        itr+=1
        print("cut_off=",cut_off)
        cluster=defaultdict(bool)
        setpoints=defaultdict(set)
        for i in nodes.keys():
            cluster[i]=False
            setpoints[i]={i}
        
        for i in nodes.keys():
            for j in nodes.keys():
                if(i!=j and j>i and cluster[i]==False and cluster[j]==False):

                    #########################################################################################
                    #########################################################################################
                    #########################################################################################
                    #########   The percentage of reduction in size depends on the cutoff chosen here ######
                    #########  The higher cut off will result in higher compression  ########################


                    if(euclidean_distance(nodes[i], nodes[j])<cut_off):
                        setpoints[i].add(j)
                        del setpoints[j]
                        cluster[j]=True
            cluster[i]=True
        
        
        if(len(setpoints)==dlen or itr==1000):
            cntn=False
        else:
            diff=len(setpoints)-dlen
            #print("diff=",diff)
            if(diff<0):
                if(cut_off<cmax):
                    cmax=cut_off
                #print("cmax=",cmax)
            else:
                if(cut_off>cmin):
                    cmin=cut_off
                #print("cmin=",cmin)
            #print("cmax=",cmax," cmin=",cmin," cut_off becomes=",(cut_off+(diff*chng)))
            while(((cut_off+(diff*chng))>=cmax) or ((cut_off+(diff*chng))<=cmin)):
                chng*=0.9
                #print("chng=",chng)
                #print("cmax=",cmax," cmin=",cmin," cut_off could became=",(cut_off+(diff*chng)))
            cut_off=cut_off+(diff*chng)
            
            #chng*=0.8
        
        print("iteration=",itr," length=",len(setpoints)," difference=",diff," change rate=",chng)
            
            
    return setpoints

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, 300)
        self.fc2 = nn.Linear(300, 100)
        self.fc3 = nn.Linear(100, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
    
net = Net()
net.load_state_dict(torch.load("LeNET_300_100_MNIST_Model"))

In [None]:

device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

In [None]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

We can not prune the layer with 256 nodes. We have to prune the layer with 300 nodes. So, that depends on the outgoing edges weights from the layer with 256 nodes, means fc1 (or the incoming weights to the layer with 300 nodes). The similar kind of weights going on to a group of nodes of the next layer means, those weights are kind of looking for similar kind of patterns. 

Example: Suppose node1 has incoming weights i1 from previous layer, and outgoing weights o1. And node2 have similar incoming  weights i2 from same previous layer and outgoing weights o2. Then if we club them up, the net effect will be some function of f(i1, i2) * (o1+o2). assuming i1 and i2 are very similar. Here f(i1, i2) is some function of i1 and i2. Here we have taken the average of i1 and i2.

In [None]:
(net.state_dict()["fc1.weight"]).shape

After changing the dimension of the layer with 300 nodes, to somehting less than 300. We also have to assign the outgoing edges. Here we have set it as the sum of the outgoing nodes of same cluster.

In [None]:
print((net.state_dict()["fc2.weight"]).shape)
print(type(net.state_dict()["fc2.weight"]))

First we have to form the clusters

In [None]:
from collections import defaultdict
nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc1.weight"])):  #Means j is a node of next layer, L2. Fc1 is connecting L1 and L2 layer.
    nodes[j]=net.state_dict()["fc1.weight"][j]

print(len(nodes), " ", len(nodes[0]))

In [None]:
print(type(nodes[0]))

Now we have nodes as a dictionary, where key is the node id, and values are the incoming edge weights form the previous layer. So, now we have to find such node_ids who are having almost similar incoming edge weights

In [None]:
from math import sqrt
distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))

In [None]:
print(distances[0])

In [None]:
distances.sort()
print(len(distances))

In [None]:
# from matplotlib import pyplot as plt 
# # plt.plot([x for x in range(len(distances))], distances) 
# # plt.ylabel("Distances")
# # plt.show() 
# x=[k for k in range(len(distances))]
# fig = plt.figure()
# ax = fig.add_subplot(111)
# ax.plot(x, distances)

# plt.show()

In [None]:
# cluster=defaultdict(bool)
# setpoints=defaultdict(set)
# for i in nodes.keys():
#     cluster[i]=False
#     setpoints[i]={i}
    
# # print(cluster)

# for i in nodes.keys():
#     for j in nodes.keys():
#         if(i!=j and j>i and cluster[i]==False and cluster[j]==False):
            
#             #########################################################################################
#             #########################################################################################
#             #########################################################################################
#             #########   The percentage of reduction in size depends on the cutoff chosen here ######
#             #########  The higher cut off will result in higher compression  ########################
            
#             cut_off=0.73
            
#             if(euclidean_distance(nodes[i], nodes[j])<cut_off):
#                 setpoints[i].add(j)
#                 del setpoints[j]
#                 cluster[j]=True
#     cluster[i]=True

In [None]:
# for key in setpoints.keys():
#     print(key," -> ",setpoints[key], "->", len(setpoints[key]))

In [None]:
# print(len(setpoints))

In [None]:
# print(distances[0]," ",distances[-1])

In [None]:
# dlen=math.ceil(len(nodes)*90/100)
setpoints=findsets(270, nodes, distances)

Now, we will take the average of the same points in the same cluster and assign it as a new bucket, and delete all the other nodes. For FC1

In [None]:
print(net.state_dict()["fc1.weight"].shape)
temp_weights=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc1.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc1.weight"][points]
    row=row/len(setpoints[key])
    temp_weights.append(row)
print(len(temp_weights), temp_weights[0].shape)

We will adjust the bias in the similar way

In [None]:
print(net.state_dict()["fc1.bias"].shape)
temp_bias=torch.zeros(len(temp_weights), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc1.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
print(temp_bias.shape)


In [None]:
net_1 = changenet(net, "fc1", temp_weights, temp_bias)

In [None]:
print(net.state_dict()["fc1.bias"].shape, " -> ", net_1.state_dict()["fc1.bias"].shape)
print(net.state_dict()["fc1.weight"].shape, " -> ", net_1.state_dict()["fc1.weight"].shape)

In [None]:
net=copy.deepcopy(net_1)
print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc1.bias"].shape)

In [None]:
#just to check if the copy has been done correctly
print(temp_weights[0],"\n\n",net.state_dict()["fc1.weight"][0])    

### Now, we have to change FC2 accordingly:

In [None]:
print(net.state_dict()["fc2.weight"].shape)

In [None]:
mat=net.state_dict()["fc2.weight"].t()
print(mat.shape)

In [None]:
temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
print(len(temp_weight), temp_weight[0].shape)

In [None]:
newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
print(newmat.shape)

In [None]:
print(net.state_dict()["fc2.bias"].shape)

In [None]:
net_2 = changenet(net, "fc2", newmat, net.state_dict()["fc2.bias"])
net=copy.deepcopy(net_2)

In [None]:
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc2.bias"].shape)

In [None]:
print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), 100)
        self.fc3 = nn.Linear(100, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
    

In [None]:
from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

In [None]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

..

Now, let's prune the next layer which consists of 100 nodes.

In [None]:
(net.state_dict()["fc2.weight"]).shape

In [None]:
(net.state_dict()["fc3.weight"]).shape

In [None]:
from collections import defaultdict
nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc2.weight"])):  #Means j is a node of next layer, L2.
    nodes[j]=net.state_dict()["fc2.weight"][j]

print(len(nodes), " ", len(nodes[0]))

In [None]:
from math import sqrt
distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))

In [None]:
distances.sort()
print(len(distances))

In [None]:
# from matplotlib import pyplot as plt 

# x=[k for k in range(len(distances))]
# fig = plt.figure()
# ax = fig.add_subplot(111)
# ax.plot(x, distances)

# plt.show()

In [None]:
# cluster=defaultdict(bool)
# setpoints=defaultdict(set)
# for i in nodes.keys():
#     cluster[i]=False
#     setpoints[i]={i}
    
# # print(cluster)

# for i in nodes.keys():
#     for j in nodes.keys():
#         if(i!=j and j>i and cluster[i]==False and cluster[j]==False):
            
#             #########################################################################################
#             #########################################################################################
#             #########################################################################################
#             #########   The percentage of reduction in size depends on the cutoff chossen here ######
#             #########  The higher cut off will result in higher compression  ########################
            
#             cut_off=0.7704
#             if(euclidean_distance(nodes[i], nodes[j])<cut_off):
#                 setpoints[i].add(j)
#                 del setpoints[j]
#                 cluster[j]=True
#     cluster[i]=True

In [None]:
# for key in setpoints.keys():
#     print(key," -> ",setpoints[key])

In [None]:
setpoints=findsets(90, nodes, distances)

So, we have i many nodes in place of 100. Now, we will first fix the incoming weights this i many nodes.

In [None]:
print(net.state_dict()["fc2.weight"].shape)
temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc2.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc2.weight"][points]
    row=row/len(setpoints[key])
    temp_weight.append(row)
print(len(temp_weight), temp_weight[0].shape)

In [None]:
print(net.state_dict()["fc2.bias"].shape)
temp_bias=torch.zeros(len(temp_weight), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc2.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
print(temp_bias.shape)


In [None]:
net_3 = changenet(net, "fc2", temp_weight, temp_bias)
net=copy.deepcopy(net_3)

In [None]:
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc2.bias"].shape)

### Now, we have to change FC3 accordingly:

In [None]:
print(net.state_dict()["fc3.weight"].shape)

In [None]:
mat=net.state_dict()["fc3.weight"].t()
print(mat.shape)

In [None]:
temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
print(len(temp_weight), temp_weight[0].shape)

In [None]:
newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
print(newmat.shape)

In [None]:
# print(net.state_dict()["fc3.weight"].shape)
# net.state_dict()["fc3.weight"].resize_(len(newmat), len(newmat[0]))
# print(net.state_dict()["fc3.weight"][1][2])

In [None]:
# for i in range(len(newmat)):
#     net.state_dict()["fc3.weight"][i]=newmat[i]
# print(net.state_dict()["fc3.weight"].shape)
# print(net.state_dict()["fc3.weight"][1][2])

In [None]:
print(net.state_dict()["fc3.bias"].shape)

In [None]:
net_4 = changenet(net, "fc3", newmat, net.state_dict()["fc3.bias"])
net=copy.deepcopy(net_4)

In [None]:
print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
    

In [None]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

Here the accuracy actually come up to 76.5, the actual accuracy without any pruning was 89.7. Let's try to freeze the previous layers, and do a fine_tuning. 

In [None]:
i=0
for parameter in net.parameters():
    i+=1
    if(i<5):
        parameter.requires_grad=False

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=200,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=200,
                                         shuffle=False, num_workers=2)

classes = ('0', '1', '2', '3',
           '4', '5', '6', '7', '8', '9')

In [None]:
print(type(trainloader))

In [None]:
i=0
for parameter in net.parameters():
    i+=1
    print(i," ",parameter.shape,"\n ",parameter,"\n\n")

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # print(inputs.shape)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 20 == 19:    # print every 2000 mini-batches
            print("[{}, {}] loss: {}".format
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Reraining')

In [None]:
torch.save(net.state_dict(), "LeNET_270_90_MNIST_Model_My_Exiperiment_4_Fine_Tuned")

In [None]:
# import torch.nn as nn
# import torch.nn.functional as F


# class Net(nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
#         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
#         self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
#         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
#         self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
#         self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
#         self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

#     def forward(self, x):
#         x = self.pool(F.relu(self.conv1(x)))
#         x = self.pool(F.relu(self.conv2(x)))
#         x = x.view(-1, 16 * 4 * 4)
#         x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
#         x = self.fc3(x)
#         return F.log_softmax(x, dim=1)
    
# net = Net()
# net.load_state_dict(torch.load("LeNET_270_90_MNIST_Model_My_Exiperiment_4_Fine_Tuned"))

In [17]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, 270)
        self.fc2 = nn.Linear(270, 90)
        self.fc3 = nn.Linear(90, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
    
net = Net()
net.load_state_dict(torch.load("LeNET_270_90_MNIST_Model_My_Exiperiment_4_Fine_Tuned"))

<All keys matched successfully>

In [18]:
from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 24, 24]             156
         MaxPool2d-2            [-1, 6, 12, 12]               0
            Conv2d-3             [-1, 16, 8, 8]           2,416
         MaxPool2d-4             [-1, 16, 4, 4]               0
            Linear-5                  [-1, 270]          69,390
            Linear-6                   [-1, 90]          24,390
            Linear-7                   [-1, 10]             910
Total params: 97,262
Trainable params: 97,262
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.05
Params size (MB): 0.37
Estimated Total Size (MB): 0.42
----------------------------------------------------------------


So, number of parametes reduced down to 86k from 110 k

In [19]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

Accuracy of the network on the test images: 98.360000 %


In [20]:
print("Pruning fc1.................")

nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc1.weight"])):  #Means j is a node of next layer, L2. Fc1 is connecting L1 and L2 layer.
    nodes[j]=net.state_dict()["fc1.weight"][j]

# print(len(nodes), " ", len(nodes[0]))

# from math import sqrt
distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))

distances.sort()
            
print("Finding clusters of nodes.................")
setpoints=findsets(240, nodes, distances)

# print(net.state_dict()["fc1.weight"].shape)
temp_weights=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc1.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc1.weight"][points]
    row=row/len(setpoints[key])
    temp_weights.append(row)
# print(len(temp_weights), temp_weights[0].shape)

# print(net.state_dict()["fc1.bias"].shape)
temp_bias=torch.zeros(len(temp_weights), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc1.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc1.................")
net_1 = changenet(net, "fc1", temp_weights, temp_bias)

print(net.state_dict()["fc1.bias"].shape, " -> ", net_1.state_dict()["fc1.bias"].shape)
print(net.state_dict()["fc1.weight"].shape, " -> ", net_1.state_dict()["fc1.weight"].shape)

net=copy.deepcopy(net_1)
# print(net.state_dict()["fc1.weight"].shape)
# print(net.state_dict()["fc1.bias"].shape)

#just to check if the copy has been done correctly
# print(temp_weights[0],"\n\n",net.state_dict()["fc1.weight"][0])    


### Now, we have to change FC2 accordingly:
# print(net.state_dict()["fc2.weight"].shape)
print("Adjusting fc2 accordingly.................")
mat=net.state_dict()["fc2.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

# print(net.state_dict()["fc2.bias"].shape)

net_2 = changenet(net, "fc2", newmat, net.state_dict()["fc2.bias"])
net=copy.deepcopy(net_2)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

    
from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))


print("Pruning fc2.................")
from collections import defaultdict
nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc2.weight"])):  #Means j is a node of next layer, L2.
    nodes[j]=net.state_dict()["fc2.weight"][j]

# print(len(nodes), " ", len(nodes[0]))


distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))
            
distances.sort()
# print(len(distances))

print("Finding clusters of nodes.................")
setpoints=findsets(80, nodes, distances)

# print(net.state_dict()["fc2.weight"].shape)
temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc2.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc2.weight"][points]
    row=row/len(setpoints[key])
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

# print(net.state_dict()["fc2.bias"].shape)
temp_bias=torch.zeros(len(temp_weight), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc2.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc2.................")
net_3 = changenet(net, "fc2", temp_weight, temp_bias)
net=copy.deepcopy(net_3)

mat=net.state_dict()["fc3.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

print("Adjusting fc3 accordingly.................")
net_4 = changenet(net, "fc3", newmat, net.state_dict()["fc3.bias"])
net=copy.deepcopy(net_4)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
    
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

i=0
for parameter in net.parameters():
    i+=1
    if(i<5):
        parameter.requires_grad=False

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=200,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=200,
                                         shuffle=False, num_workers=2)

classes = ('0', '1', '2', '3',
           '4', '5', '6', '7', '8', '9')

# import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # print(inputs.shape)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 20 == 19:    # print every 2000 mini-batches
            print("[{}, {}] loss: {}".format
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Reraining')


from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

Pruning fc1.................
Finding clusters of nodes.................
desired len= 240
cut_off= 0.8459521625151378
iteration= 1  length= 25  difference= -215  change rate= 0.0008459521625151378
cut_off= 0.6640724475743832
iteration= 2  length= 247  difference= 7  change rate= 0.0008459521625151378
cut_off= 0.6699941127119892
iteration= 3  length= 246  difference= 6  change rate= 0.0008459521625151378
cut_off= 0.67506982568708
iteration= 4  length= 244  difference= 4  change rate= 0.0008459521625151378
cut_off= 0.6784536343371406
iteration= 5  length= 243  difference= 3  change rate= 0.0008459521625151378
cut_off= 0.680991490824686
iteration= 6  length= 242  difference= 2  change rate= 0.0008459521625151378
cut_off= 0.6826833951497163
iteration= 7  length= 242  difference= 2  change rate= 0.0008459521625151378
cut_off= 0.6843752994747466
iteration= 8  length= 242  difference= 2  change rate= 0.0008459521625151378
cut_off= 0.6860672037997769
iteration= 9  length= 242  difference= 2  ch

In [21]:
torch.save(net.state_dict(), "LeNET_240_80_MNIST_Model_My_Exiperiment_4_Fine_Tuned")

In [26]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, 240)
        self.fc2 = nn.Linear(240, 80)
        self.fc3 = nn.Linear(80, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
    
net = Net()
net.load_state_dict(torch.load("LeNET_240_80_MNIST_Model_My_Exiperiment_4_Fine_Tuned"))

<All keys matched successfully>

In [27]:
print("Pruning fc1.................")

nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc1.weight"])):  #Means j is a node of next layer, L2. Fc1 is connecting L1 and L2 layer.
    nodes[j]=net.state_dict()["fc1.weight"][j]

# print(len(nodes), " ", len(nodes[0]))

# from math import sqrt
distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))

distances.sort()
            
print("Finding clusters of nodes.................")
setpoints=findsets(210, nodes, distances)

# print(net.state_dict()["fc1.weight"].shape)
temp_weights=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc1.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc1.weight"][points]
    row=row/len(setpoints[key])
    temp_weights.append(row)
# print(len(temp_weights), temp_weights[0].shape)

# print(net.state_dict()["fc1.bias"].shape)
temp_bias=torch.zeros(len(temp_weights), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc1.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc1.................")
net_1 = changenet(net, "fc1", temp_weights, temp_bias)

print(net.state_dict()["fc1.bias"].shape, " -> ", net_1.state_dict()["fc1.bias"].shape)
print(net.state_dict()["fc1.weight"].shape, " -> ", net_1.state_dict()["fc1.weight"].shape)

net=copy.deepcopy(net_1)
# print(net.state_dict()["fc1.weight"].shape)
# print(net.state_dict()["fc1.bias"].shape)

#just to check if the copy has been done correctly
# print(temp_weights[0],"\n\n",net.state_dict()["fc1.weight"][0])    


### Now, we have to change FC2 accordingly:
# print(net.state_dict()["fc2.weight"].shape)
print("Adjusting fc2 accordingly.................")
mat=net.state_dict()["fc2.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

# print(net.state_dict()["fc2.bias"].shape)

net_2 = changenet(net, "fc2", newmat, net.state_dict()["fc2.bias"])
net=copy.deepcopy(net_2)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

    
from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))


print("Pruning fc2.................")
from collections import defaultdict
nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc2.weight"])):  #Means j is a node of next layer, L2.
    nodes[j]=net.state_dict()["fc2.weight"][j]

# print(len(nodes), " ", len(nodes[0]))


distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))
            
distances.sort()
# print(len(distances))

print("Finding clusters of nodes.................")
setpoints=findsets(70, nodes, distances)

# print(net.state_dict()["fc2.weight"].shape)
temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc2.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc2.weight"][points]
    row=row/len(setpoints[key])
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

# print(net.state_dict()["fc2.bias"].shape)
temp_bias=torch.zeros(len(temp_weight), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc2.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc2.................")
net_3 = changenet(net, "fc2", temp_weight, temp_bias)
net=copy.deepcopy(net_3)

mat=net.state_dict()["fc3.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

print("Adjusting fc3 accordingly.................")
net_4 = changenet(net, "fc3", newmat, net.state_dict()["fc3.bias"])
net=copy.deepcopy(net_4)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
    
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

i=0
for parameter in net.parameters():
    i+=1
    if(i<5):
        parameter.requires_grad=False

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=200,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=200,
                                         shuffle=False, num_workers=2)

classes = ('0', '1', '2', '3',
           '4', '5', '6', '7', '8', '9')

# import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # print(inputs.shape)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 20 == 19:    # print every 2000 mini-batches
            print("[{}, {}] loss: {}".format
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Reraining')


from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

Pruning fc1.................
Finding clusters of nodes.................
desired len= 210
cut_off= 0.8683952650168648
iteration= 1  length= 17  difference= -193  change rate= 0.0008683952650168648
cut_off= 0.7007949788686099
iteration= 2  length= 30  difference= -180  change rate= 0.0008683952650168648
cut_off= 0.5444838311655742
iteration= 3  length= 239  difference= 29  change rate= 0.0008683952650168648
cut_off= 0.5696672938510633
iteration= 4  length= 239  difference= 29  change rate= 0.0008683952650168648
cut_off= 0.5948507565365524
iteration= 5  length= 237  difference= 27  change rate= 0.0008683952650168648
cut_off= 0.6182974286920078
iteration= 6  length= 211  difference= 1  change rate= 0.0008683952650168648
cut_off= 0.6191658239570247
iteration= 7  length= 211  difference= 1  change rate= 0.0008683952650168648
cut_off= 0.6200342192220416
iteration= 8  length= 209  difference= -1  change rate= 0.0007815557385151783
cut_off= 0.6192526634835265
iteration= 9  length= 211  differen

In [28]:
torch.save(net.state_dict(), "LeNET_210_70_MNIST_Model_My_Exiperiment_4_Fine_Tuned")

In [29]:
print("Pruning fc1.................")

nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc1.weight"])):  #Means j is a node of next layer, L2. Fc1 is connecting L1 and L2 layer.
    nodes[j]=net.state_dict()["fc1.weight"][j]

# print(len(nodes), " ", len(nodes[0]))

# from math import sqrt
distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))

distances.sort()
            
print("Finding clusters of nodes.................")
setpoints=findsets(180, nodes, distances)

# print(net.state_dict()["fc1.weight"].shape)
temp_weights=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc1.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc1.weight"][points]
    row=row/len(setpoints[key])
    temp_weights.append(row)
# print(len(temp_weights), temp_weights[0].shape)

# print(net.state_dict()["fc1.bias"].shape)
temp_bias=torch.zeros(len(temp_weights), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc1.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc1.................")
net_1 = changenet(net, "fc1", temp_weights, temp_bias)

print(net.state_dict()["fc1.bias"].shape, " -> ", net_1.state_dict()["fc1.bias"].shape)
print(net.state_dict()["fc1.weight"].shape, " -> ", net_1.state_dict()["fc1.weight"].shape)

net=copy.deepcopy(net_1)
# print(net.state_dict()["fc1.weight"].shape)
# print(net.state_dict()["fc1.bias"].shape)

#just to check if the copy has been done correctly
# print(temp_weights[0],"\n\n",net.state_dict()["fc1.weight"][0])    


### Now, we have to change FC2 accordingly:
# print(net.state_dict()["fc2.weight"].shape)
print("Adjusting fc2 accordingly.................")
mat=net.state_dict()["fc2.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

# print(net.state_dict()["fc2.bias"].shape)

net_2 = changenet(net, "fc2", newmat, net.state_dict()["fc2.bias"])
net=copy.deepcopy(net_2)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

    
from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))


print("Pruning fc2.................")
from collections import defaultdict
nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc2.weight"])):  #Means j is a node of next layer, L2.
    nodes[j]=net.state_dict()["fc2.weight"][j]

# print(len(nodes), " ", len(nodes[0]))


distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))
            
distances.sort()
# print(len(distances))

print("Finding clusters of nodes.................")
setpoints=findsets(60, nodes, distances)

# print(net.state_dict()["fc2.weight"].shape)
temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc2.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc2.weight"][points]
    row=row/len(setpoints[key])
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

# print(net.state_dict()["fc2.bias"].shape)
temp_bias=torch.zeros(len(temp_weight), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc2.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc2.................")
net_3 = changenet(net, "fc2", temp_weight, temp_bias)
net=copy.deepcopy(net_3)

mat=net.state_dict()["fc3.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

print("Adjusting fc3 accordingly.................")
net_4 = changenet(net, "fc3", newmat, net.state_dict()["fc3.bias"])
net=copy.deepcopy(net_4)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
    
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

i=0
for parameter in net.parameters():
    i+=1
    if(i<5):
        parameter.requires_grad=False

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=200,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=200,
                                         shuffle=False, num_workers=2)

classes = ('0', '1', '2', '3',
           '4', '5', '6', '7', '8', '9')

# import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # print(inputs.shape)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 20 == 19:    # print every 2000 mini-batches
            print("[{}, {}] loss: {}".format
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Reraining')


from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

Pruning fc1.................
Finding clusters of nodes.................
desired len= 180
cut_off= 0.8881022881134663
iteration= 1  length= 16  difference= -164  change rate= 0.0008881022881134662
cut_off= 0.7424535128628578
iteration= 2  length= 208  difference= 28  change rate= 0.0008881022881134662
cut_off= 0.7673203769300349
iteration= 3  length= 194  difference= 14  change rate= 0.0008881022881134662
cut_off= 0.7797538089636233
iteration= 4  length= 174  difference= -6  change rate= 0.0008881022881134662
cut_off= 0.7744251952349426
iteration= 5  length= 182  difference= 2  change rate= 0.0008881022881134662
cut_off= 0.7762013998111695
iteration= 6  length= 176  difference= -4  change rate= 0.00042477657128757776
cut_off= 0.7745022935260192
iteration= 7  length= 182  difference= 2  change rate= 0.00042477657128757776
cut_off= 0.7753518466685944
iteration= 8  length= 180  difference= 2  change rate= 0.00042477657128757776
Updating fc1.................
torch.Size([210])  ->  torch.Siz

In [30]:
torch.save(net.state_dict(), "LeNET_180_60_MNIST_Model_My_Exiperiment_4_Fine_Tuned")

In [31]:
len_1=150
len_2=50

print("Pruning fc1.................")

nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc1.weight"])):  #Means j is a node of next layer, L2. Fc1 is connecting L1 and L2 layer.
    nodes[j]=net.state_dict()["fc1.weight"][j]

# print(len(nodes), " ", len(nodes[0]))

# from math import sqrt
distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))

distances.sort()
            
print("Finding clusters of nodes.................")
setpoints=findsets(len_1, nodes, distances)

# print(net.state_dict()["fc1.weight"].shape)
temp_weights=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc1.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc1.weight"][points]
    row=row/len(setpoints[key])
    temp_weights.append(row)
# print(len(temp_weights), temp_weights[0].shape)

# print(net.state_dict()["fc1.bias"].shape)
temp_bias=torch.zeros(len(temp_weights), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc1.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc1.................")
net_1 = changenet(net, "fc1", temp_weights, temp_bias)

print(net.state_dict()["fc1.bias"].shape, " -> ", net_1.state_dict()["fc1.bias"].shape)
print(net.state_dict()["fc1.weight"].shape, " -> ", net_1.state_dict()["fc1.weight"].shape)

net=copy.deepcopy(net_1)
# print(net.state_dict()["fc1.weight"].shape)
# print(net.state_dict()["fc1.bias"].shape)

#just to check if the copy has been done correctly
# print(temp_weights[0],"\n\n",net.state_dict()["fc1.weight"][0])    


### Now, we have to change FC2 accordingly:
# print(net.state_dict()["fc2.weight"].shape)
print("Adjusting fc2 accordingly.................")
mat=net.state_dict()["fc2.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

# print(net.state_dict()["fc2.bias"].shape)

net_2 = changenet(net, "fc2", newmat, net.state_dict()["fc2.bias"])
net=copy.deepcopy(net_2)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

    
from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))


print("Pruning fc2.................")
from collections import defaultdict
nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc2.weight"])):  #Means j is a node of next layer, L2.
    nodes[j]=net.state_dict()["fc2.weight"][j]

# print(len(nodes), " ", len(nodes[0]))


distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))
            
distances.sort()
# print(len(distances))

print("Finding clusters of nodes.................")
setpoints=findsets(len_2, nodes, distances)

# print(net.state_dict()["fc2.weight"].shape)
temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc2.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc2.weight"][points]
    row=row/len(setpoints[key])
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

# print(net.state_dict()["fc2.bias"].shape)
temp_bias=torch.zeros(len(temp_weight), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc2.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc2.................")
net_3 = changenet(net, "fc2", temp_weight, temp_bias)
net=copy.deepcopy(net_3)

mat=net.state_dict()["fc3.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

print("Adjusting fc3 accordingly.................")
net_4 = changenet(net, "fc3", newmat, net.state_dict()["fc3.bias"])
net=copy.deepcopy(net_4)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
    
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

i=0
for parameter in net.parameters():
    i+=1
    if(i<5):
        parameter.requires_grad=False

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=200,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=200,
                                         shuffle=False, num_workers=2)

classes = ('0', '1', '2', '3',
           '4', '5', '6', '7', '8', '9')

# import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # print(inputs.shape)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 20 == 19:    # print every 2000 mini-batches
            print("[{}, {}] loss: {}".format
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Reraining')


from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

Pruning fc1.................
Finding clusters of nodes.................
desired len= 150
cut_off= 0.9020356482103368
iteration= 1  length= 1  difference= -149  change rate= 0.0009020356482103368
cut_off= 0.7676323366269966
iteration= 2  length= 26  difference= -124  change rate= 0.0009020356482103368
cut_off= 0.6557799162489149
iteration= 3  length= 159  difference= 9  change rate= 0.0009020356482103368
cut_off= 0.6638982370828079
iteration= 4  length= 157  difference= 7  change rate= 0.0009020356482103368
cut_off= 0.6702124866202803
iteration= 5  length= 154  difference= 4  change rate= 0.0009020356482103368
cut_off= 0.6738206292131217
iteration= 6  length= 151  difference= 1  change rate= 0.0009020356482103368
cut_off= 0.674722664861332
iteration= 7  length= 151  difference= 1  change rate= 0.0009020356482103368
cut_off= 0.6756247005095424
iteration= 8  length= 151  difference= 1  change rate= 0.0009020356482103368
cut_off= 0.6765267361577527
iteration= 9  length= 151  difference= 1 

In [32]:
torch.save(net.state_dict(), "LeNET_150_50_MNIST_Model_My_Exiperiment_4_Fine_Tuned")

In [33]:
len_1=120
len_2=40

print("Pruning fc1.................")

nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc1.weight"])):  #Means j is a node of next layer, L2. Fc1 is connecting L1 and L2 layer.
    nodes[j]=net.state_dict()["fc1.weight"][j]

# print(len(nodes), " ", len(nodes[0]))

# from math import sqrt
distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))

distances.sort()
            
print("Finding clusters of nodes.................")
setpoints=findsets(len_1, nodes, distances)

# print(net.state_dict()["fc1.weight"].shape)
temp_weights=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc1.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc1.weight"][points]
    row=row/len(setpoints[key])
    temp_weights.append(row)
# print(len(temp_weights), temp_weights[0].shape)

# print(net.state_dict()["fc1.bias"].shape)
temp_bias=torch.zeros(len(temp_weights), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc1.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc1.................")
net_1 = changenet(net, "fc1", temp_weights, temp_bias)

print(net.state_dict()["fc1.bias"].shape, " -> ", net_1.state_dict()["fc1.bias"].shape)
print(net.state_dict()["fc1.weight"].shape, " -> ", net_1.state_dict()["fc1.weight"].shape)

net=copy.deepcopy(net_1)
# print(net.state_dict()["fc1.weight"].shape)
# print(net.state_dict()["fc1.bias"].shape)

#just to check if the copy has been done correctly
# print(temp_weights[0],"\n\n",net.state_dict()["fc1.weight"][0])    


### Now, we have to change FC2 accordingly:
# print(net.state_dict()["fc2.weight"].shape)
print("Adjusting fc2 accordingly.................")
mat=net.state_dict()["fc2.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

# print(net.state_dict()["fc2.bias"].shape)

net_2 = changenet(net, "fc2", newmat, net.state_dict()["fc2.bias"])
net=copy.deepcopy(net_2)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

    
from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))


print("Pruning fc2.................")
from collections import defaultdict
nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc2.weight"])):  #Means j is a node of next layer, L2.
    nodes[j]=net.state_dict()["fc2.weight"][j]

# print(len(nodes), " ", len(nodes[0]))


distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))
            
distances.sort()
# print(len(distances))

print("Finding clusters of nodes.................")
setpoints=findsets(len_2, nodes, distances)

# print(net.state_dict()["fc2.weight"].shape)
temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc2.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc2.weight"][points]
    row=row/len(setpoints[key])
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

# print(net.state_dict()["fc2.bias"].shape)
temp_bias=torch.zeros(len(temp_weight), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc2.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc2.................")
net_3 = changenet(net, "fc2", temp_weight, temp_bias)
net=copy.deepcopy(net_3)

mat=net.state_dict()["fc3.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

print("Adjusting fc3 accordingly.................")
net_4 = changenet(net, "fc3", newmat, net.state_dict()["fc3.bias"])
net=copy.deepcopy(net_4)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
    
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

i=0
for parameter in net.parameters():
    i+=1
    if(i<5):
        parameter.requires_grad=False

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=200,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=200,
                                         shuffle=False, num_workers=2)

classes = ('0', '1', '2', '3',
           '4', '5', '6', '7', '8', '9')

# import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # print(inputs.shape)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 20 == 19:    # print every 2000 mini-batches
            print("[{}, {}] loss: {}".format
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Reraining')


from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

Pruning fc1.................
Finding clusters of nodes.................
desired len= 120
cut_off= 0.9369274934462453
iteration= 1  length= 1  difference= -119  change rate= 0.0009369274934462453
cut_off= 0.825433121726142
iteration= 2  length= 1  difference= -119  change rate= 0.0009369274934462453
cut_off= 0.7139387500060388
iteration= 3  length= 26  difference= -94  change rate= 0.0009369274934462453
cut_off= 0.6258675656220918
iteration= 4  length= 142  difference= 22  change rate= 0.0009369274934462453
cut_off= 0.6464799704779092
iteration= 5  length= 121  difference= 1  change rate= 0.0009369274934462453
cut_off= 0.6474168979713554
iteration= 6  length= 119  difference= -1  change rate= 0.0008432347441016208
cut_off= 0.6465736632272537
iteration= 7  length= 121  difference= 1  change rate= 0.0007589112696914588
cut_off= 0.6473325744969451
iteration= 8  length= 119  difference= -1  change rate= 0.0006830201427223129
cut_off= 0.6466495543542228
iteration= 9  length= 120  difference=

In [34]:
torch.save(net.state_dict(), "LeNET_120_40_MNIST_Model_My_Exiperiment_4_Fine_Tuned")

In [35]:
len_1=90
len_2=30

print("Pruning fc1.................")

nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc1.weight"])):  #Means j is a node of next layer, L2. Fc1 is connecting L1 and L2 layer.
    nodes[j]=net.state_dict()["fc1.weight"][j]

# print(len(nodes), " ", len(nodes[0]))

# from math import sqrt
distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))

distances.sort()
            
print("Finding clusters of nodes.................")
setpoints=findsets(len_1, nodes, distances)

# print(net.state_dict()["fc1.weight"].shape)
temp_weights=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc1.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc1.weight"][points]
    row=row/len(setpoints[key])
    temp_weights.append(row)
# print(len(temp_weights), temp_weights[0].shape)

# print(net.state_dict()["fc1.bias"].shape)
temp_bias=torch.zeros(len(temp_weights), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc1.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc1.................")
net_1 = changenet(net, "fc1", temp_weights, temp_bias)

print(net.state_dict()["fc1.bias"].shape, " -> ", net_1.state_dict()["fc1.bias"].shape)
print(net.state_dict()["fc1.weight"].shape, " -> ", net_1.state_dict()["fc1.weight"].shape)

net=copy.deepcopy(net_1)
# print(net.state_dict()["fc1.weight"].shape)
# print(net.state_dict()["fc1.bias"].shape)

#just to check if the copy has been done correctly
# print(temp_weights[0],"\n\n",net.state_dict()["fc1.weight"][0])    


### Now, we have to change FC2 accordingly:
# print(net.state_dict()["fc2.weight"].shape)
print("Adjusting fc2 accordingly.................")
mat=net.state_dict()["fc2.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

# print(net.state_dict()["fc2.bias"].shape)

net_2 = changenet(net, "fc2", newmat, net.state_dict()["fc2.bias"])
net=copy.deepcopy(net_2)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

    
from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))


print("Pruning fc2.................")
from collections import defaultdict
nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc2.weight"])):  #Means j is a node of next layer, L2.
    nodes[j]=net.state_dict()["fc2.weight"][j]

# print(len(nodes), " ", len(nodes[0]))


distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))
            
distances.sort()
# print(len(distances))

print("Finding clusters of nodes.................")
setpoints=findsets(len_2, nodes, distances)

# print(net.state_dict()["fc2.weight"].shape)
temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc2.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc2.weight"][points]
    row=row/len(setpoints[key])
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

# print(net.state_dict()["fc2.bias"].shape)
temp_bias=torch.zeros(len(temp_weight), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc2.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc2.................")
net_3 = changenet(net, "fc2", temp_weight, temp_bias)
net=copy.deepcopy(net_3)

mat=net.state_dict()["fc3.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

print("Adjusting fc3 accordingly.................")
net_4 = changenet(net, "fc3", newmat, net.state_dict()["fc3.bias"])
net=copy.deepcopy(net_4)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
    
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

i=0
for parameter in net.parameters():
    i+=1
    if(i<5):
        parameter.requires_grad=False

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=200,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=200,
                                         shuffle=False, num_workers=2)

classes = ('0', '1', '2', '3',
           '4', '5', '6', '7', '8', '9')

# import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # print(inputs.shape)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 20 == 19:    # print every 2000 mini-batches
            print("[{}, {}] loss: {}".format
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Reraining')


from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

Pruning fc1.................
Finding clusters of nodes.................
desired len= 90
cut_off= 0.9729651454689077
iteration= 1  length= 1  difference= -89  change rate= 0.0009729651454689077
cut_off= 0.8863712475221749
iteration= 2  length= 1  difference= -89  change rate= 0.0009729651454689077
cut_off= 0.7997773495754421
iteration= 3  length= 7  difference= -83  change rate= 0.0009729651454689077
cut_off= 0.7190212425015228
iteration= 4  length= 31  difference= -59  change rate= 0.0009729651454689077
cut_off= 0.6616162989188572
iteration= 5  length= 102  difference= 12  change rate= 0.0009729651454689077
cut_off= 0.6732918806644841
iteration= 6  length= 91  difference= 1  change rate= 0.0009729651454689077
cut_off= 0.674264845809953
iteration= 7  length= 91  difference= 1  change rate= 0.0009729651454689077
cut_off= 0.675237810955422
iteration= 8  length= 91  difference= 1  change rate= 0.0009729651454689077
cut_off= 0.676210776100891
iteration= 9  length= 90  difference= 1  change 

In [36]:
torch.save(net.state_dict(), "LeNET_90_30_MNIST_Model_My_Exiperiment_4_Fine_Tuned")

In [37]:
len_1=60
len_2=20

print("Pruning fc1.................")

nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc1.weight"])):  #Means j is a node of next layer, L2. Fc1 is connecting L1 and L2 layer.
    nodes[j]=net.state_dict()["fc1.weight"][j]

# print(len(nodes), " ", len(nodes[0]))

# from math import sqrt
distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))

distances.sort()
            
print("Finding clusters of nodes.................")
setpoints=findsets(len_1, nodes, distances)

# print(net.state_dict()["fc1.weight"].shape)
temp_weights=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc1.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc1.weight"][points]
    row=row/len(setpoints[key])
    temp_weights.append(row)
# print(len(temp_weights), temp_weights[0].shape)

# print(net.state_dict()["fc1.bias"].shape)
temp_bias=torch.zeros(len(temp_weights), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc1.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc1.................")
net_1 = changenet(net, "fc1", temp_weights, temp_bias)

print(net.state_dict()["fc1.bias"].shape, " -> ", net_1.state_dict()["fc1.bias"].shape)
print(net.state_dict()["fc1.weight"].shape, " -> ", net_1.state_dict()["fc1.weight"].shape)

net=copy.deepcopy(net_1)
# print(net.state_dict()["fc1.weight"].shape)
# print(net.state_dict()["fc1.bias"].shape)

#just to check if the copy has been done correctly
# print(temp_weights[0],"\n\n",net.state_dict()["fc1.weight"][0])    


### Now, we have to change FC2 accordingly:
# print(net.state_dict()["fc2.weight"].shape)
print("Adjusting fc2 accordingly.................")
mat=net.state_dict()["fc2.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

# print(net.state_dict()["fc2.bias"].shape)

net_2 = changenet(net, "fc2", newmat, net.state_dict()["fc2.bias"])
net=copy.deepcopy(net_2)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

    
from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))


print("Pruning fc2.................")
from collections import defaultdict
nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc2.weight"])):  #Means j is a node of next layer, L2.
    nodes[j]=net.state_dict()["fc2.weight"][j]

# print(len(nodes), " ", len(nodes[0]))


distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))
            
distances.sort()
# print(len(distances))

print("Finding clusters of nodes.................")
setpoints=findsets(len_2, nodes, distances)

# print(net.state_dict()["fc2.weight"].shape)
temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc2.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc2.weight"][points]
    row=row/len(setpoints[key])
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

# print(net.state_dict()["fc2.bias"].shape)
temp_bias=torch.zeros(len(temp_weight), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc2.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc2.................")
net_3 = changenet(net, "fc2", temp_weight, temp_bias)
net=copy.deepcopy(net_3)

mat=net.state_dict()["fc3.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

print("Adjusting fc3 accordingly.................")
net_4 = changenet(net, "fc3", newmat, net.state_dict()["fc3.bias"])
net=copy.deepcopy(net_4)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
    
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

i=0
for parameter in net.parameters():
    i+=1
    if(i<5):
        parameter.requires_grad=False

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=200,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=200,
                                         shuffle=False, num_workers=2)

classes = ('0', '1', '2', '3',
           '4', '5', '6', '7', '8', '9')

# import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # print(inputs.shape)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 20 == 19:    # print every 2000 mini-batches
            print("[{}, {}] loss: {}".format
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Reraining')


from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

Pruning fc1.................
Finding clusters of nodes.................
desired len= 60
cut_off= 1.0209020409849774
iteration= 1  length= 1  difference= -59  change rate= 0.0010209020409849773
cut_off= 0.9606688205668638
iteration= 2  length= 1  difference= -59  change rate= 0.0010209020409849773
cut_off= 0.9004356001487501
iteration= 3  length= 1  difference= -59  change rate= 0.0010209020409849773
cut_off= 0.8402023797306365
iteration= 4  length= 8  difference= -52  change rate= 0.0010209020409849773
cut_off= 0.7871154735994177
iteration= 5  length= 21  difference= -39  change rate= 0.0010209020409849773
cut_off= 0.7473002940010036
iteration= 6  length= 59  difference= -1  change rate= 0.0010209020409849773
cut_off= 0.7462793919600187
iteration= 7  length= 59  difference= -1  change rate= 0.0010209020409849773
cut_off= 0.7452584899190338
iteration= 8  length= 61  difference= 1  change rate= 0.0009188118368864796
cut_off= 0.7461773017559202
iteration= 9  length= 61  difference= 1  cha

In [38]:
torch.save(net.state_dict(), "LeNET_60_20_MNIST_Model_My_Exiperiment_4_Fine_Tuned")

In [39]:
len_1=30
len_2=10

print("Pruning fc1.................")

nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc1.weight"])):  #Means j is a node of next layer, L2. Fc1 is connecting L1 and L2 layer.
    nodes[j]=net.state_dict()["fc1.weight"][j]

# print(len(nodes), " ", len(nodes[0]))

# from math import sqrt
distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))

distances.sort()
            
print("Finding clusters of nodes.................")
setpoints=findsets(len_1, nodes, distances)

# print(net.state_dict()["fc1.weight"].shape)
temp_weights=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc1.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc1.weight"][points]
    row=row/len(setpoints[key])
    temp_weights.append(row)
# print(len(temp_weights), temp_weights[0].shape)

# print(net.state_dict()["fc1.bias"].shape)
temp_bias=torch.zeros(len(temp_weights), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc1.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc1.................")
net_1 = changenet(net, "fc1", temp_weights, temp_bias)

print(net.state_dict()["fc1.bias"].shape, " -> ", net_1.state_dict()["fc1.bias"].shape)
print(net.state_dict()["fc1.weight"].shape, " -> ", net_1.state_dict()["fc1.weight"].shape)

net=copy.deepcopy(net_1)
# print(net.state_dict()["fc1.weight"].shape)
# print(net.state_dict()["fc1.bias"].shape)

#just to check if the copy has been done correctly
# print(temp_weights[0],"\n\n",net.state_dict()["fc1.weight"][0])    


### Now, we have to change FC2 accordingly:
# print(net.state_dict()["fc2.weight"].shape)
print("Adjusting fc2 accordingly.................")
mat=net.state_dict()["fc2.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

# print(net.state_dict()["fc2.bias"].shape)

net_2 = changenet(net, "fc2", newmat, net.state_dict()["fc2.bias"])
net=copy.deepcopy(net_2)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

    
from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))


print("Pruning fc2.................")
from collections import defaultdict
nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc2.weight"])):  #Means j is a node of next layer, L2.
    nodes[j]=net.state_dict()["fc2.weight"][j]

# print(len(nodes), " ", len(nodes[0]))


distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))
            
distances.sort()
# print(len(distances))

print("Finding clusters of nodes.................")
setpoints=findsets(len_2, nodes, distances)

# print(net.state_dict()["fc2.weight"].shape)
temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc2.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc2.weight"][points]
    row=row/len(setpoints[key])
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

# print(net.state_dict()["fc2.bias"].shape)
temp_bias=torch.zeros(len(temp_weight), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc2.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc2.................")
net_3 = changenet(net, "fc2", temp_weight, temp_bias)
net=copy.deepcopy(net_3)

mat=net.state_dict()["fc3.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

print("Adjusting fc3 accordingly.................")
net_4 = changenet(net, "fc3", newmat, net.state_dict()["fc3.bias"])
net=copy.deepcopy(net_4)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
    
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

i=0
for parameter in net.parameters():
    i+=1
    if(i<5):
        parameter.requires_grad=False

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=200,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=200,
                                         shuffle=False, num_workers=2)

classes = ('0', '1', '2', '3',
           '4', '5', '6', '7', '8', '9')

# import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # print(inputs.shape)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 20 == 19:    # print every 2000 mini-batches
            print("[{}, {}] loss: {}".format
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Reraining')


from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

Pruning fc1.................
Finding clusters of nodes.................
desired len= 30
cut_off= 1.0944377371834768
iteration= 1  length= 11  difference= -19  change rate= 0.0010944377371834768
cut_off= 1.0736434201769907
iteration= 2  length= 13  difference= -17  change rate= 0.0010944377371834768
cut_off= 1.0550379786448716
iteration= 3  length= 16  difference= -14  change rate= 0.0010944377371834768
cut_off= 1.039715850324303
iteration= 4  length= 17  difference= -13  change rate= 0.0010944377371834768
cut_off= 1.0254881597409178
iteration= 5  length= 20  difference= -10  change rate= 0.0010944377371834768
cut_off= 1.014543782369083
iteration= 6  length= 21  difference= -9  change rate= 0.0010944377371834768
cut_off= 1.0046938427344319
iteration= 7  length= 25  difference= -5  change rate= 0.0010944377371834768
cut_off= 0.9992216540485145
iteration= 8  length= 25  difference= -5  change rate= 0.0010944377371834768
cut_off= 0.9937494653625971
iteration= 9  length= 28  difference= -2 

In [40]:
torch.save(net.state_dict(), "LeNET_30_10_MNIST_Model_My_Exiperiment_4_Fine_Tuned")

In [41]:
len_1=15
len_2=5

print("Pruning fc1.................")

nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc1.weight"])):  #Means j is a node of next layer, L2. Fc1 is connecting L1 and L2 layer.
    nodes[j]=net.state_dict()["fc1.weight"][j]

# print(len(nodes), " ", len(nodes[0]))

# from math import sqrt
distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))

distances.sort()
            
print("Finding clusters of nodes.................")
setpoints=findsets(len_1, nodes, distances)

# print(net.state_dict()["fc1.weight"].shape)
temp_weights=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc1.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc1.weight"][points]
    row=row/len(setpoints[key])
    temp_weights.append(row)
# print(len(temp_weights), temp_weights[0].shape)

# print(net.state_dict()["fc1.bias"].shape)
temp_bias=torch.zeros(len(temp_weights), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc1.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc1.................")
net_1 = changenet(net, "fc1", temp_weights, temp_bias)

print(net.state_dict()["fc1.bias"].shape, " -> ", net_1.state_dict()["fc1.bias"].shape)
print(net.state_dict()["fc1.weight"].shape, " -> ", net_1.state_dict()["fc1.weight"].shape)

net=copy.deepcopy(net_1)
# print(net.state_dict()["fc1.weight"].shape)
# print(net.state_dict()["fc1.bias"].shape)

#just to check if the copy has been done correctly
# print(temp_weights[0],"\n\n",net.state_dict()["fc1.weight"][0])    


### Now, we have to change FC2 accordingly:
# print(net.state_dict()["fc2.weight"].shape)
print("Adjusting fc2 accordingly.................")
mat=net.state_dict()["fc2.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

# print(net.state_dict()["fc2.bias"].shape)

net_2 = changenet(net, "fc2", newmat, net.state_dict()["fc2.bias"])
net=copy.deepcopy(net_2)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

    
from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))


print("Pruning fc2.................")
from collections import defaultdict
nodes=defaultdict(list)
for j in range(len(net.state_dict()["fc2.weight"])):  #Means j is a node of next layer, L2.
    nodes[j]=net.state_dict()["fc2.weight"][j]

# print(len(nodes), " ", len(nodes[0]))


distances=[]
for i in nodes.keys():
    for j in nodes.keys():
        if(i!=j and j>i):
            distances.append(euclidean_distance(nodes[i], nodes[j]))
            
distances.sort()
# print(len(distances))

print("Finding clusters of nodes.................")
setpoints=findsets(len_2, nodes, distances)

# print(net.state_dict()["fc2.weight"].shape)
temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(net.state_dict()["fc2.weight"][0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=net.state_dict()["fc2.weight"][points]
    row=row/len(setpoints[key])
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

# print(net.state_dict()["fc2.bias"].shape)
temp_bias=torch.zeros(len(temp_weight), dtype=torch.float)
i=0
for key in setpoints.keys():
    for points in setpoints[key]:
        temp_bias[i]+=net.state_dict()["fc2.bias"][points]
    temp_bias[i]/=len(setpoints[key])
    i+=1
# print(temp_bias.shape)

print("Updating fc2.................")
net_3 = changenet(net, "fc2", temp_weight, temp_bias)
net=copy.deepcopy(net_3)

mat=net.state_dict()["fc3.weight"].t()
# print(mat.shape)

temp_weight=[]
for key in setpoints.keys():
    row=torch.zeros(len(mat[0]), dtype=torch.float)
    for points in setpoints[key]:
        row+=mat[points]
    temp_weight.append(row)
# print(len(temp_weight), temp_weight[0].shape)

newmat=torch.stack(temp_weight, dim=0)
newmat=newmat.t()
# print(newmat.shape)

print("Adjusting fc3 accordingly.................")
net_4 = changenet(net, "fc3", newmat, net.state_dict()["fc3.bias"])
net=copy.deepcopy(net_4)

print(net.state_dict()["fc1.weight"].shape)
print(net.state_dict()["fc2.weight"].shape)
print(net.state_dict()["fc3.weight"].shape)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(16 * 4 * 4, len(net.state_dict()["fc1.weight"]))
        self.fc2 = nn.Linear(len(net.state_dict()["fc2.weight"][0]), len(net.state_dict()["fc2.weight"]))
        self.fc3 = nn.Linear(len(net.state_dict()["fc3.weight"][0]), 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
    
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

i=0
for parameter in net.parameters():
    i+=1
    if(i<5):
        parameter.requires_grad=False

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=200,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=200,
                                         shuffle=False, num_workers=2)

classes = ('0', '1', '2', '3',
           '4', '5', '6', '7', '8', '9')

# import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # print(inputs.shape)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 20 == 19:    # print every 2000 mini-batches
            print("[{}, {}] loss: {}".format
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Reraining')


from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

Pruning fc1.................
Finding clusters of nodes.................
desired len= 15
cut_off= 1.2030386460265161
iteration= 1  length= 9  difference= -6  change rate= 0.0012030386460265162
cut_off= 1.195820414150357
iteration= 2  length= 10  difference= -5  change rate= 0.0012030386460265162
cut_off= 1.1898052209202243
iteration= 3  length= 10  difference= -5  change rate= 0.0012030386460265162
cut_off= 1.1837900276900917
iteration= 4  length= 10  difference= -5  change rate= 0.0012030386460265162
cut_off= 1.177774834459959
iteration= 5  length= 10  difference= -5  change rate= 0.0012030386460265162
cut_off= 1.1717596412298263
iteration= 6  length= 10  difference= -5  change rate= 0.0012030386460265162
cut_off= 1.1657444479996937
iteration= 7  length= 10  difference= -5  change rate= 0.0012030386460265162
cut_off= 1.159729254769561
iteration= 8  length= 10  difference= -5  change rate= 0.0012030386460265162
cut_off= 1.1537140615394283
iteration= 9  length= 12  difference= -3  change

In [42]:
torch.save(net.state_dict(), "LeNET_15_5_MNIST_Model_My_Exiperiment_4_Fine_Tuned")

Resources:
[https://towardsdatascience.com/how-to-cluster-in-high-dimensions-4ef693bacc6] [1/11/2019]