In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
import torch.nn.utils.prune as prune

from copy import deepcopy

In [None]:
housing = fetch_california_housing()

scaler = StandardScaler()
X = scaler.fit_transform(housing.data)
y = housing.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [None]:
class SimpleFCN(nn.Module):
    def __init__(self, input_size=8):
        super(SimpleFCN, self).__init__()
        self.fc1 = nn.Linear(input_size, 16)
        self.fc2 = nn.Linear(16, 8)
        self.fc3 = nn.Linear(8, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

input_size = X_train.shape[1]
model = SimpleFCN(input_size)

In [None]:
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)

def train_model(model, train_loader, criterion, optimizer, num_epochs=20):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, targets in train_loader:
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, targets)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

def evaluate_model(model, test_loader, criterion):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()

    avg_loss = total_loss / len(test_loader)
    print(f'Evaluation Loss: {avg_loss:.4f}')
    return avg_loss

num_epochs = 20
train_model(model, train_loader, criterion, optimizer, num_epochs=num_epochs)
evaluate_model(model, test_loader, criterion)

Epoch [1/20], Loss: 2.2121
Epoch [2/20], Loss: 0.6129
Epoch [3/20], Loss: 0.4882
Epoch [4/20], Loss: 0.4379
Epoch [5/20], Loss: 0.4162
Epoch [6/20], Loss: 0.4020
Epoch [7/20], Loss: 0.3917
Epoch [8/20], Loss: 0.3831
Epoch [9/20], Loss: 0.3732
Epoch [10/20], Loss: 0.3655
Epoch [11/20], Loss: 0.3556
Epoch [12/20], Loss: 0.3501
Epoch [13/20], Loss: 0.3425
Epoch [14/20], Loss: 0.3375
Epoch [15/20], Loss: 0.3326
Epoch [16/20], Loss: 0.3296
Epoch [17/20], Loss: 0.3268
Epoch [18/20], Loss: 0.3220
Epoch [19/20], Loss: 0.3197
Epoch [20/20], Loss: 0.3171
Evaluation Loss: 0.3235


0.3234672683935899

In [None]:
eps = torch.tensor([1e-10])

In [None]:
@torch.enable_grad()
def evaluate_sensitivity(model, dataloader, loss_function):

    sensitivity = {}

    for data, target in dataloader:

        model.zero_grad()

        output = model(data)
        loss = loss_function(output, target)
        loss.backward()

        for param_name, p in model.named_parameters():
            if "weight" in param_name:

                if param_name in sensitivity.keys():
                    sensitivity[param_name] = sensitivity[param_name] + torch.abs(p).detach().cpu()
                else:
                    sensitivity[param_name] = torch.abs(p).detach().cpu()

    for k in sensitivity.keys():
        sensitivity[k] /= len(dataloader)
        sensitivity[k] /= torch.max(torch.max(sensitivity[k], eps))

    return sensitivity


In [None]:
def do_pruning(model, pruning_type="Random", **kwargs):
    prune_model = deepcopy(model)

    amount = kwargs.pop('amount', 0.3)
    logs = kwargs.pop('logs', False)
    sensitivity = kwargs.pop('sensitivity', {})
    counter = list(sensitivity.keys())

    def apply_l1_pruning(module):
        if isinstance(module, nn.Linear):
            prune.l1_unstructured(module, name='weight', amount=amount)
            if logs:
                print(f"Применен L1 прунинг к слою: {module}")
                print(f"Маска:\n{module.weight_mask}\n")

    def apply_random_pruning(module):
        if isinstance(module, nn.Linear):
            prune.random_unstructured(module, name='weight', amount=amount)
            if logs:
                print(f"Применен Random прунинг к слою: {module}")
                print(f"Маска:\n{module.weight_mask}\n")

    def remove_pruning(module):
        if isinstance(module, nn.Linear):
            if hasattr(module, 'weight_mask'):
                prune.remove(module, 'weight')
                if logs:
                    print(f"Удалена маска прунинга в слое: {module}")

    def zero_weights_by_mask(module):
        if isinstance(module, nn.Linear):
            if hasattr(module, 'weight_mask'):
                with torch.no_grad():
                    module.weight.data *= module.weight_mask
                    if logs:
                        print(f"Обнулены веса по маске в слое: {module}")
                        print(f"Текущие веса:\n{module.weight.data}\n")


    def apply_sensitivity_pruning(module):
        nonlocal counter

        if isinstance(module, nn.Linear):
            param_name = counter.pop(0)
            if param_name in sensitivity:

                sens_tensor = sensitivity[param_name]

                flat_weights = module.weight.view(-1)
                flat_sensitivity = sens_tensor.view(-1)

                num_params_to_prune = int(amount * flat_sensitivity.numel())
                _, indices = torch.topk(flat_sensitivity, k=num_params_to_prune, largest=False)

                mask = torch.ones_like(flat_weights)
                mask[indices] = 0

                prune.custom_from_mask(module, name='weight', mask=mask.view_as(module.weight))

                if logs:
                    print(f"Применен Sensitivity-based прунинг к слою: {module}")
                    print(f"Маска:\n{mask.view_as(module.weight)}\n")

    apply_func = None
    if pruning_type == "L1":
        apply_func = apply_l1_pruning
    elif pruning_type == "Random":
        apply_func = apply_random_pruning
    elif pruning_type == "Remove":
        apply_func = remove_pruning
    elif pruning_type == "SensitivityBased":
        apply_func = apply_sensitivity_pruning

    if apply_func:
        prune_model.apply(lambda module: apply_func(module))
        prune_model.apply(lambda module: zero_weights_by_mask(module))
        prune_model.apply(lambda module: remove_pruning(module))

    return prune_model


In [None]:
pruned_model = do_pruning(model, pruning_type="L1", amount=0.75, logs=False)

In [None]:
evaluate_model(pruned_model, test_loader, criterion)

Evaluation Loss: 4.3000


4.299985500482412

In [None]:
def dense_to_sparse(dense_tensor):
    indices = dense_tensor.nonzero(as_tuple=True)
    values = dense_tensor[indices]
    indices = torch.stack(indices)

    sparse_tensor = torch.sparse.FloatTensor(indices, values, dense_tensor.size())
    return sparse_tensor

In [None]:
class SparseLinear(nn.Module):
    def __init__(self, weight, bias):
        super(SparseLinear, self).__init__()

        self.weight_indices = weight.coalesce().indices()
        self.weight_values = nn.Parameter(weight.coalesce().values())
        self.weight_size = list(weight.coalesce().size())

        self.bias_indices = bias.coalesce().indices()
        self.bias_values = nn.Parameter(bias.coalesce().values())
        self.bias_size = list(bias.coalesce().size())

    def forward(self, input):
        sparse_weight = torch.sparse.FloatTensor(self.weight_indices, self.weight_values, self.weight_size)
        sparse_bias = torch.sparse.FloatTensor(self.bias_indices, self.bias_values, self.bias_size).to_dense()

        output = torch.sparse.mm(sparse_weight, input.t()).t()
        output += sparse_bias.unsqueeze(0)

        return output

In [None]:
# import torch
# import torch.nn as nn

# class SparseLinear(nn.Module):
#     def __init__(self, weight, bias):
#         super(SparseLinear, self).__init__()

#         self.weight = nn.Parameter(weight.to_dense())
#         # self.bias = nn.Parameter(bias)

#     def forward(self, input):

#         output = torch.sparse.mm(self.weight, input.t()).t()
#         # output += self.bias

#         return output

In [None]:
def convert_dense_to_sparse_network(model):
    new_model = model.__class__()

    for name, module in model.named_children():
        if isinstance(module, nn.Linear):
            sparse_weight = dense_to_sparse(module.weight.data)
            sparse_bias = dense_to_sparse(module.bias.data)

            setattr(new_model, name, SparseLinear(sparse_weight, sparse_bias))
        else:
            setattr(new_model, name, convert_dense_to_sparse_network(module))
    return new_model

In [None]:
sparse_model = convert_dense_to_sparse_network(pruned_model)

In [None]:
for param in sparse_model.named_parameters():
    print(param[0], param[1].shape)

fc1.weight_values torch.Size([32])
fc1.bias_values torch.Size([16])
fc2.weight_values torch.Size([32])
fc2.bias_values torch.Size([8])
fc3.weight_values torch.Size([2])
fc3.bias_values torch.Size([1])


In [None]:
for param in model.named_parameters():
    print(param[0], param[1].shape)

fc1.weight torch.Size([16, 8])
fc1.bias torch.Size([16])
fc2.weight torch.Size([8, 16])
fc2.bias torch.Size([8])
fc3.weight torch.Size([1, 8])
fc3.bias torch.Size([1])


In [None]:
criterion = nn.MSELoss()
optimizer = optim.AdamW(sparse_model.parameters(), lr=0.001)
# optimizer = optim.SparseAdam(sparse_model.parameters(), lr=0.001)
num_epochs = 20

train_model(sparse_model, train_loader, criterion, optimizer, num_epochs=num_epochs)
evaluate_model(sparse_model, test_loader, criterion)

Epoch [1/20], Loss: 3.5410
Epoch [2/20], Loss: 2.7627
Epoch [3/20], Loss: 2.3026
Epoch [4/20], Loss: 1.9679
Epoch [5/20], Loss: 1.7301
Epoch [6/20], Loss: 1.5679


KeyboardInterrupt: 

In [None]:
for name, module in sparse_model.named_children():
    if isinstance(module, SparseLinear):
        print(module.weight_indices)
        print(module.weight_values)
        print(module.weight_size)

tensor([[ 0,  0,  1,  1,  1,  2,  2,  3,  3,  3,  4,  5,  5,  6,  7,  8,  9,  9,
         10, 10, 11, 11, 11, 11, 12, 12, 13, 13, 14, 15, 15, 15],
        [ 0,  7,  5,  6,  7,  6,  7,  2,  3,  5,  2,  1,  3,  1,  2,  5,  0,  5,
          2,  5,  2,  3,  5,  6,  0,  2,  3,  7,  1,  5,  6,  7]])
Parameter containing:
tensor([-0.5490, -0.0094, -1.4751, -0.2817, -0.6310, -1.0393, -0.7908, -1.0391,
         0.4169, -0.4048,  0.4887,  0.3577, -0.3371,  0.4536,  0.5027, -1.5247,
         0.4702, -1.1093,  0.2888, -0.6178,  0.0719, -0.0153, -1.4159, -0.4777,
        -0.5847, -0.4861,  0.3875, -0.3778, -0.3593, -2.6192, -0.5686, -0.4481],
       requires_grad=True)
torch.Size([16, 8])
tensor([[ 0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,
          3,  3,  3,  4,  4,  5,  5,  5,  5,  7,  7,  7,  7,  7],
        [ 0,  1,  3,  5,  7,  8, 10, 15,  0,  1,  8, 11, 15,  2,  3, 10, 15,  0,
          6, 11, 15,  0,  9,  1,  3, 10, 15,  1,  2,  8,  9, 15]])
Parameter containing

In [None]:
# Define a simple sparse neural network
class SparseNet(nn.Module):
    def __init__(self):
        super(SparseNet, self).__init__()
        self.fc1 = nn.Linear(10, 5)  # Input layer
        self.fc2 = nn.Linear(5, 1)    # Output layer

        # Set weights to be sparse
        self.fc1.weight.data = torch.nn.functional.dropout(self.fc1.weight.data, p=0.8, training=True)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Sparse Adam optimizer
class SparseAdam(optim.Adam):
    def __init__(self, params, lr=1e-3):
        super(SparseAdam, self).__init__(params, lr=lr)

    def step(self, closure=None):
        # Custom step logic can be added here for sparse optimization
        return super(SparseAdam, self).step(closure)

# Training loop
def train(model, optimizer, criterion, data_loader, epochs=5):
    model.train()
    for epoch in range(epochs):
        for inputs, targets in data_loader:
            optimizer.zero_grad()  # Clear previous gradients
            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs, targets)  # Compute loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update weights

        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Example usage
if __name__ == "__main__":
    # Create synthetic data
    inputs = torch.randn(100, 10)
    targets = torch.randn(100, 1)

    # Create a data loader
    dataset = torch.utils.data.TensorDataset(inputs, targets)
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True)

    # Instantiate model, optimizer, and loss function
    model = SparseNet()
    optimizer = SparseAdam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()

    # Train the model
    train(model, optimizer, criterion, data_loader, epochs=5)


Epoch [1/5], Loss: 0.9319
Epoch [2/5], Loss: 1.7572
Epoch [3/5], Loss: 0.4295
Epoch [4/5], Loss: 1.8499
Epoch [5/5], Loss: 1.3668
