<a href="https://colab.research.google.com/github/TheCaveOfAdullam/study3/blob/main/1016test1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install torch_pruning

Collecting torch_pruning
  Downloading torch_pruning-1.4.3-py3-none-any.whl.metadata (29 kB)
Downloading torch_pruning-1.4.3-py3-none-any.whl (62 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/62.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.9/62.9 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_pruning
Successfully installed torch_pruning-1.4.3


In [3]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import LabelEncoder
import torch.nn.utils.prune as prune
import torch_pruning as tp

In [4]:
# 기본 경로 설정
base_dir = '/content/drive/MyDrive/ship_motor10'
categories = ['normal', 'fault_BB', 'fault_RI', 'fault_SM']
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 데이터 로드 및 전처리 함수 정의
class VibrationDataset(Dataset):
    def __init__(self, base_dir, split, categories, transform=None):
        self.X = []
        self.y = []
        self.transform = transform
        split_dir = os.path.join(base_dir, split)
        for category in categories:
            category_dir = os.path.join(split_dir, category)
            files = os.listdir(category_dir)
            for file in files:
                file_path = os.path.join(category_dir, file)
                data = pd.read_csv(file_path, header=None).values
                data = pd.to_numeric(data.flatten(), errors='coerce').reshape(-1, data.shape[1])
                data = np.nan_to_num(data).astype('float32')
                self.X.append(data)
                self.y.append(category)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        X = self.X[idx]
        y = self.y[idx]
        X = X.reshape(-1)
        return torch.tensor(X, dtype=torch.float32), y

# 레이블 인코딩
label_encoder = LabelEncoder()

# 데이터셋 준비
train_dataset = VibrationDataset(base_dir, 'train', categories)
val_dataset = VibrationDataset(base_dir, 'validation', categories)
test_dataset = VibrationDataset(base_dir, 'test', categories)

# 레이블 인코딩
y_train_encoded = label_encoder.fit_transform([y for _, y in train_dataset])
y_val_encoded = label_encoder.transform([y for _, y in val_dataset])
y_test_encoded = label_encoder.transform([y for _, y in test_dataset])

# 레이블 인코딩된 데이터셋 업데이트
train_dataset.y = y_train_encoded
val_dataset.y = y_val_encoded
test_dataset.y = y_test_encoded

# 데이터 로더
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [22]:
# CNN 모델 정의
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=64, kernel_size=16, stride=16)
        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=32, kernel_size=3, stride=1)
        self.conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=5, stride=1)
        self.conv4 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=5, stride=1)
        self.pool2 = nn.MaxPool1d(kernel_size=2, stride=2)

        conv1_output_size = (24002 - 16) // 16 + 1
        pool1_output_size = conv1_output_size // 2
        conv2_output_size = (pool1_output_size - 3) // 1 + 1
        conv3_output_size = (conv2_output_size - 5) // 1 + 1
        conv4_output_size = (conv3_output_size - 5) // 1 + 1
        pool2_output_size = conv4_output_size // 2

        self.fc1 = nn.Linear(128 * pool2_output_size, 5000)
        self.fc2 = nn.Linear(5000, 1000)
        self.fc3 = nn.Linear(1000, len(categories))

    def forward(self, x):
        x = self.conv1(x)
        x = torch.relu(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = torch.relu(x)
        x = self.conv3(x)
        x = torch.relu(x)
        x = self.conv4(x)
        x = torch.relu(x)

        x = self.pool2(x)
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# 손실 함수 및 옵티마이저 설정
model = CNNModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [23]:
# 모델 훈련 함수
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.unsqueeze(1).to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        val_loss, val_accuracy = evaluate_model(model, val_loader, criterion)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/total:.4f}, Accuracy: {100 * correct/total:.2f}%, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%')

# 모델 평가 함수
def evaluate_model(model, loader, criterion):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.unsqueeze(1).to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return val_loss / total, 100 * correct / total

In [24]:
# import torch
# import torch.nn as nn
# import torch.nn.utils.prune as prune
# import numpy as np
# import torchprune as tp

# # 비구조적 프루닝 적용
# def apply_unstructured_pruning(model, amount=0.5):
#     for name, module in model.named_modules():
#         if isinstance(module, nn.Conv1d) or isinstance(module, nn.Linear):
#             prune.l1_unstructured(module, name='weight', amount=amount)
#             prune.remove(module, 'weight')  # 비구조적 프루닝 후 마스크 제거
#     print(f"Unstructured pruning applied with {amount * 100}% of weights pruned.")
#     return model

# # 0 가중치 필터 감지 (최소 한 개의 필터는 남기도록 조정)
# def detect_filters_to_prune(model, threshold=0.7):
#     filters_to_prune = []
#     for name, module in model.named_modules():
#         if isinstance(module, nn.Conv1d):
#             weight_data = module.weight.detach().cpu().numpy()
#             filter_zero_percentage = np.mean(weight_data == 0, axis=(1, 2))  # 각 필터의 0 비율 계산
#             prune_indices = np.where(filter_zero_percentage >= threshold)[0]  # 70% 이상인 필터의 인덱스

#             # 최소 한 개의 필터는 남기기 위한 조건 추가
#             if len(prune_indices) > 0 and len(prune_indices) < module.weight.shape[0]:
#                 filters_to_prune.append((module, prune_indices))
#             else:
#                 print(f"Skipping pruning for {name} as it would remove all filters.")
#     return filters_to_prune

# # 모델의 예시 입력 정의 (필요한 입력의 모양 제공)
# example_inputs = torch.randn(1, 1, 24002).to(device)  # (배치 크기, 채널 수, 시퀀스 길이)

# # 구조적 프루닝 적용 및 채널 불일치 해결
# def apply_structural_pruning_with_torchprune(model, filters_to_prune):
#     # DependencyGraph에서 종속성 그래프를 생성하고 예시 입력을 통해 분석
#     DG = tp.DependencyGraph().build_dependency(model, example_inputs=example_inputs)

#     for module, prune_indices in filters_to_prune:
#         # get_pruning_group을 사용하여 프루닝 그룹을 가져옵니다.
#         pruning_group = DG.get_pruning_group(module, tp.prune_conv_out_channels, idxs=prune_indices)

#         # 프루닝 그룹을 실행하여 프루닝 적용
#         if pruning_group is not None:
#             pruning_group.prune()
#             print(f"Pruned {len(prune_indices)} filters from module {module}.")
#         else:
#             print(f"Skipping module {module} as no pruning group was generated.")

#     print(f"Structural pruning applied. {len(filters_to_prune)} filters pruned.")
#     return model

# # 채널 불일치를 해결하기 위한 1x1 컨볼루션 레이어 추가
# def add_1x1_conv_if_needed(model):
#     new_layers = []
#     for name, module in model.named_children():
#         new_layers.append(module)
#         if isinstance(module, nn.Conv1d):
#             out_channels = module.out_channels
#             in_channels = module.in_channels
#             # 출력 채널이 입력 채널과 다를 때 1x1 컨볼루션 추가
#             if out_channels != in_channels:
#                 new_layers.append(nn.Conv1d(out_channels, out_channels, kernel_size=1))
#                 print(f"Added 1x1 Conv1d to match channels after layer {name}.")
#     return nn.Sequential(*new_layers)

# # 모델 프루닝 통합 함수
# def prune_model(model, unstructured_amount=0.5, zero_threshold=0.7):
#     model = apply_unstructured_pruning(model, amount=unstructured_amount)
#     filters_to_prune = detect_filters_to_prune(model, threshold=zero_threshold)
#     if filters_to_prune:
#         model = apply_structural_pruning_with_torchprune(model, filters_to_prune)
#         model = add_1x1_conv_if_needed(model)  # 프루닝 후 채널 불일치 해결
#     else:
#         print("No filters meet the zero weight threshold for pruning.")
#     return model

# # 모델 훈련 함수
# def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
#     for epoch in range(num_epochs):
#         model.train()
#         for inputs, labels in train_loader:
#             inputs, labels = inputs.unsqueeze(1).to(device), labels.to(device)
#             optimizer.zero_grad()
#             outputs = model(inputs)
#             loss = criterion(outputs, labels)
#             loss.backward()
#             optimizer.step()

#         # 검증 로직을 여기에 추가할 수 있습니다.
#         print(f"Epoch {epoch+1}/{num_epochs} completed.")
#     return model

In [30]:
import torch
import torch.nn as nn
import torch.nn.utils.prune as prune
import numpy as np
import torch_pruning as tp  # torch_pruning 라이브러리 사용

# 비구조적 프루닝 적용
def apply_unstructured_pruning(model, amount=0.5):
    for name, module in model.named_modules():
        if isinstance(module, nn.Conv1d) or isinstance(module, nn.Linear):
            prune.l1_unstructured(module, name='weight', amount=amount)
            prune.remove(module, 'weight')  # 비구조적 프루닝 후 마스크 제거
    print(f"Unstructured pruning applied with {amount * 100}% of weights pruned.")
    return model

# 0 가중치 필터 감지 (최소 한 개의 필터는 남기도록 조정)
def detect_filters_to_prune(model, threshold=0.7):
    filters_to_prune = []
    for name, module in model.named_modules():
        if isinstance(module, nn.Conv1d):
            weight_data = module.weight.detach().cpu().numpy()
            filter_zero_percentage = np.mean(weight_data == 0, axis=(1, 2))  # 각 필터의 0 비율 계산
            prune_indices = np.where(filter_zero_percentage >= threshold)[0]  # 70% 이상인 필터의 인덱스

            # 최소 한 개의 필터는 남기기 위한 조건 추가
            if len(prune_indices) > 0 and len(prune_indices) < module.weight.shape[0]:
                filters_to_prune.append((module, prune_indices))
            else:
                print(f"Skipping pruning for {name} as it would remove all filters.")
    return filters_to_prune

# 모델의 예시 입력 정의 (필요한 입력의 모양 제공)
example_inputs = torch.randn(1, 1, 24002).to(device)  # (배치 크기, 채널 수, 시퀀스 길이)

# 구조적 프루닝 적용 및 채널 불일치 해결 (torch_pruning 활용)
def apply_structural_pruning_with_torchprune(model, filters_to_prune):
    # DependencyGraph에서 종속성 그래프를 생성하고 예시 입력을 통해 분석
    DG = tp.DependencyGraph().build_dependency(model, example_inputs=example_inputs)

    for module, prune_indices in filters_to_prune:
        # get_pruning_group을 사용하여 프루닝 그룹을 가져옵니다.
        pruning_group = DG.get_pruning_group(module, tp.prune_conv_out_channels, idxs=prune_indices)

        # 프루닝 그룹을 실행하여 프루닝 적용 (pruning_group 객체의 prune 메서드 사용)
        if pruning_group is not None:
            pruning_group.prune()  # 프루닝 그룹의 prune 메서드 호출
            print(f"Pruned {len(prune_indices)} filters from module {module}.")
        else:
            print(f"Skipping module {module} as no pruning group was generated.")

    print(f"Structural pruning applied. {len(filters_to_prune)} filters pruned.")
    return model

# 모델 프루닝 통합 함수
def prune_model(model, unstructured_amount=0.5, zero_threshold=0.7):
    model = apply_unstructured_pruning(model, amount=unstructured_amount)
    filters_to_prune = detect_filters_to_prune(model, threshold=zero_threshold)
    if filters_to_prune:
        model = apply_structural_pruning_with_torchprune(model, filters_to_prune)
    else:
        print("No filters meet the zero weight threshold for pruning.")
    return model

# 모델 훈련 함수
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.unsqueeze(1).to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # 검증 로직을 여기에 추가할 수 있습니다.
        print(f"Epoch {epoch+1}/{num_epochs} completed.")
    return model

In [26]:
# 모델 훈련 및 프루닝 테스트
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=7)

Epoch 1/7 completed.
Epoch 2/7 completed.
Epoch 3/7 completed.
Epoch 4/7 completed.
Epoch 5/7 completed.
Epoch 6/7 completed.
Epoch 7/7 completed.


CNNModel(
  (conv1): Conv1d(1, 64, kernel_size=(16,), stride=(16,))
  (pool1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(64, 32, kernel_size=(3,), stride=(1,))
  (conv3): Conv1d(32, 64, kernel_size=(5,), stride=(1,))
  (conv4): Conv1d(64, 128, kernel_size=(5,), stride=(1,))
  (pool2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=47360, out_features=5000, bias=True)
  (fc2): Linear(in_features=5000, out_features=1000, bias=True)
  (fc3): Linear(in_features=1000, out_features=4, bias=True)
)

In [31]:
pruned_model = prune_model(model, unstructured_amount=0.8, zero_threshold=0.70)

Unstructured pruning applied with 80.0% of weights pruned.
Skipping pruning for conv4 as it would remove all filters.
Pruned 48 filters from module Conv1d(1, 16, kernel_size=(16,), stride=(16,)).
Pruned 30 filters from module Conv1d(16, 2, kernel_size=(3,), stride=(1,)).
Pruned 61 filters from module Conv1d(2, 3, kernel_size=(5,), stride=(1,)).
Structural pruning applied. 3 filters pruned.


In [32]:
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10)

Epoch 1/10 completed.
Epoch 2/10 completed.
Epoch 3/10 completed.
Epoch 4/10 completed.
Epoch 5/10 completed.
Epoch 6/10 completed.
Epoch 7/10 completed.
Epoch 8/10 completed.
Epoch 9/10 completed.
Epoch 10/10 completed.


CNNModel(
  (conv1): Conv1d(1, 16, kernel_size=(16,), stride=(16,))
  (pool1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(16, 2, kernel_size=(3,), stride=(1,))
  (conv3): Conv1d(2, 3, kernel_size=(5,), stride=(1,))
  (conv4): Conv1d(3, 128, kernel_size=(5,), stride=(1,))
  (pool2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=47360, out_features=5000, bias=True)
  (fc2): Linear(in_features=5000, out_features=1000, bias=True)
  (fc3): Linear(in_features=1000, out_features=4, bias=True)
)

In [33]:
# 테스트 성능 확인
test_loss, test_accuracy = evaluate_model(pruned_model, test_loader, criterion)
print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%')

Test Loss: 0.0201, Test Accuracy: 83.33%


In [34]:
# 비제로 가중치 계산 함수
def count_nonzero_weights(model):
    nonzero_count = 0
    total_count = 0
    for name, param in model.named_parameters():
        if param.requires_grad:
            nonzero_count += torch.sum(param != 0).item()  # 0이 아닌 가중치 수 계산
            total_count += param.numel()  # 전체 가중치 수 계산
    return nonzero_count, total_count

# 비제로 가중치 수 계산
nonzero_weights, total_weights = count_nonzero_weights(model)
print(f"Number of non-zero weights: {nonzero_weights}")
print(f"Total number of weights: {total_weights}")
print(f"Percentage of non-zero weights: {100 * nonzero_weights / total_weights:.2f}%")

Number of non-zero weights: 58937307
Total number of weights: 241812455
Percentage of non-zero weights: 24.37%
