<a href="https://colab.research.google.com/github/TheCaveOfAdullam/study3/blob/main/1002Test1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [19]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import LabelEncoder
import torch.nn.utils.prune as prune

In [20]:
# 기본 경로 설정
base_dir = '/content/drive/MyDrive/ship_motor10'
categories = ['normal', 'fault_BB', 'fault_RI', 'fault_SM']
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 데이터 로드 및 전처리 함수 정의
class VibrationDataset(Dataset):
    def __init__(self, base_dir, split, categories, transform=None):
        self.X = []
        self.y = []
        self.transform = transform
        split_dir = os.path.join(base_dir, split)
        for category in categories:
            category_dir = os.path.join(split_dir, category)
            files = os.listdir(category_dir)
            for file in files:
                file_path = os.path.join(category_dir, file)
                data = pd.read_csv(file_path, header=None).values
                data = pd.to_numeric(data.flatten(), errors='coerce').reshape(-1, data.shape[1])
                data = np.nan_to_num(data).astype('float32')  # NaN 값을 0으로 대체하고, float32로 변환
                self.X.append(data)
                self.y.append(category)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        X = self.X[idx]
        y = self.y[idx]
        X = X.reshape(-1)  # 마지막 차원을 시퀀스에 병합하여 [sequence_length]로 변환
        return torch.tensor(X, dtype=torch.float32), y

# 레이블 인코딩
label_encoder = LabelEncoder()

# 데이터셋 준비
train_dataset = VibrationDataset(base_dir, 'train', categories)
val_dataset = VibrationDataset(base_dir, 'validation', categories)
test_dataset = VibrationDataset(base_dir, 'test', categories)

# 레이블 인코딩 및 원-핫 인코딩
y_train_encoded = label_encoder.fit_transform([y for _, y in train_dataset])
y_val_encoded = label_encoder.transform([y for _, y in val_dataset])
y_test_encoded = label_encoder.transform([y for _, y in test_dataset])

# 데이터셋에 레이블 추가
train_dataset.y = y_train_encoded
val_dataset.y = y_val_encoded
test_dataset.y = y_test_encoded

# 데이터 로더
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [21]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=64, kernel_size=16, stride=16)
        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=32, kernel_size=3, stride=1)
        self.conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=5, stride=1)
        self.conv4 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=5, stride=1)
        self.pool2 = nn.MaxPool1d(kernel_size=2, stride=2)

        # 계산된 출력 크기
        conv1_output_size = (24002 - 16) // 16 + 1
        pool1_output_size = conv1_output_size // 2
        conv2_output_size = (pool1_output_size - 3) // 1 + 1
        conv3_output_size = (conv2_output_size - 5) // 1 + 1
        conv4_output_size = (conv3_output_size - 5) // 1 + 1
        pool2_output_size = conv4_output_size // 2

        self.fc1 = nn.Linear(128 * pool2_output_size, 5000)
        self.fc2 = nn.Linear(5000, 1000)
        self.fc3 = nn.Linear(1000, len(categories))

    def forward(self, x):
        x = self.conv1(x)
        x = torch.relu(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = torch.relu(x)
        x = self.conv3(x)
        x = torch.relu(x)
        x = self.conv4(x)
        x = torch.relu(x)
        x = self.pool2(x)
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# 모델 초기화
model = CNNModel().to(device)

# 손실 함수 및 옵티마이저
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [22]:
# 모델 훈련
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.unsqueeze(1).to(device), labels.to(device)  # 입력 형태를 [batch_size, 1, sequence_length]로 만듦
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        val_loss, val_accuracy = evaluate_model(model, val_loader, criterion)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/total:.4f}, Accuracy: {100 * correct/total:.2f}%, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%')

In [23]:
# 모델 평가
def evaluate_model(model, loader, criterion):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.unsqueeze(1).to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return val_loss / total, 100 * correct / total

In [24]:
# 매그니튜드 기반 프루닝 함수
def prune_by_magnitude(model, threshold):
    for name, module in model.named_modules():
        if isinstance(module, nn.Conv1d) or isinstance(module, nn.Linear):
            prune.l1_unstructured(module, name='weight', amount=threshold)
            prune.remove(module, 'weight')  # 프루닝 마스크 제거하고 실제 파라미터에 반영
    print(f'Magnitude-based pruning with threshold: {threshold} applied.')

# 1차 테일러 전개 기반 프루닝 함수
def prune_by_taylor(model, threshold):
    for name, module in model.named_modules():
        if isinstance(module, nn.Conv1d) or isinstance(module, nn.Linear):
            importance = torch.abs(module.weight * module.weight.grad)
            mask = importance > threshold
            with torch.no_grad():
                module.weight[~mask] = 0  # 중요하지 않은 파라미터는 0으로 설정
    print(f'Taylor expansion-based pruning with threshold: {threshold} applied.')

In [25]:
# 모델 재훈련 (파인튜닝) 함수
def fine_tune_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=5):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.unsqueeze(1).to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        val_loss, val_accuracy = evaluate_model(model, val_loader, criterion)
        print(f'Fine-tuning Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/total:.4f}, Accuracy: {100 * correct/total:.2f}%, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%')

In [26]:
# 초기 훈련
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10)

Epoch [1/10], Loss: 0.0391, Accuracy: 49.92%, Val Loss: 0.0394, Val Accuracy: 50.00%
Epoch [2/10], Loss: 0.0390, Accuracy: 50.00%, Val Loss: 0.0392, Val Accuracy: 50.00%
Epoch [3/10], Loss: 0.0272, Accuracy: 65.22%, Val Loss: 0.0056, Val Accuracy: 94.15%
Epoch [4/10], Loss: 0.0051, Accuracy: 95.52%, Val Loss: 0.0015, Val Accuracy: 99.81%
Epoch [5/10], Loss: 0.0008, Accuracy: 99.83%, Val Loss: 0.0004, Val Accuracy: 100.00%
Epoch [6/10], Loss: 0.0034, Accuracy: 97.44%, Val Loss: 0.0141, Val Accuracy: 76.56%
Epoch [7/10], Loss: 0.0011, Accuracy: 99.02%, Val Loss: 0.0002, Val Accuracy: 100.00%
Epoch [8/10], Loss: 0.0004, Accuracy: 99.78%, Val Loss: 0.0002, Val Accuracy: 100.00%
Epoch [9/10], Loss: 0.0028, Accuracy: 97.66%, Val Loss: 0.0018, Val Accuracy: 99.00%
Epoch [10/10], Loss: 0.0011, Accuracy: 99.14%, Val Loss: 0.0060, Val Accuracy: 91.59%


In [27]:
# 매그니튜드 기반 프루닝 적용
magnitude_threshold = 0.1  # 매그니튜드 프루닝 임계값 설정
prune_by_magnitude(model, magnitude_threshold)

Magnitude-based pruning with threshold: 0.1 applied.


In [28]:
# 매그니튜드 기반 프루닝 후 파인튜닝
optimizer = optim.Adam(model.parameters(), lr=0.0001)
fine_tune_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=5)

Fine-tuning Epoch [1/5], Loss: 0.0025, Accuracy: 97.99%, Val Loss: 0.0012, Val Accuracy: 99.04%
Fine-tuning Epoch [2/5], Loss: 0.0005, Accuracy: 99.70%, Val Loss: 0.0002, Val Accuracy: 99.93%
Fine-tuning Epoch [3/5], Loss: 0.0001, Accuracy: 99.98%, Val Loss: 0.0001, Val Accuracy: 100.00%
Fine-tuning Epoch [4/5], Loss: 0.0000, Accuracy: 100.00%, Val Loss: 0.0000, Val Accuracy: 100.00%
Fine-tuning Epoch [5/5], Loss: 0.0006, Accuracy: 99.33%, Val Loss: 0.0000, Val Accuracy: 100.00%


In [29]:
# 1차 테일러 전개 기반 프루닝 적용
taylor_threshold = 0.00001  # 테일러 전개 프루닝 임계값 설정
prune_by_taylor(model, taylor_threshold)

Taylor expansion-based pruning with threshold: 1e-06 applied.


In [30]:
# 테일러 전개 기반 프루닝 후 파인튜닝
optimizer = optim.Adam(model.parameters(), lr=0.0001)
fine_tune_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=5)

Fine-tuning Epoch [1/5], Loss: 0.0117, Accuracy: 83.46%, Val Loss: 0.0004, Val Accuracy: 100.00%
Fine-tuning Epoch [2/5], Loss: 0.0020, Accuracy: 99.21%, Val Loss: 0.1421, Val Accuracy: 50.00%
Fine-tuning Epoch [3/5], Loss: 0.0021, Accuracy: 98.48%, Val Loss: 0.0003, Val Accuracy: 100.00%
Fine-tuning Epoch [4/5], Loss: 0.0002, Accuracy: 100.00%, Val Loss: 0.0004, Val Accuracy: 99.56%
Fine-tuning Epoch [5/5], Loss: 0.0059, Accuracy: 95.47%, Val Loss: 0.0022, Val Accuracy: 99.30%


In [31]:
# 테스트 성능 확인
test_loss, test_accuracy = evaluate_model(model, test_loader, criterion)
print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%')

Test Loss: 0.0020, Test Accuracy: 99.96%


In [32]:
# 모델 저장
torch.save(model.state_dict(), 'model.pth')

In [33]:
# 모델 사이즈 확인
model_size = os.path.getsize('model.pth') / (1024 * 1024)
print(f"Model Size: {model_size:.2f} MB")

Model Size: 922.66 MB


In [34]:
# 파라미터 수 확인
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

total_params = count_parameters(model)
print(f"Total number of trainable parameters: {total_params}")

Total number of trainable parameters: 241868660


In [35]:
# 비제로 가중치 계산 함수
def count_nonzero_weights(model):
    nonzero_count = 0
    total_count = 0
    for name, param in model.named_parameters():
        if param.requires_grad:
            nonzero_count += torch.sum(param != 0).item()  # 0이 아닌 가중치 수 계산
            total_count += param.numel()  # 전체 가중치 수 계산
    return nonzero_count, total_count

# 비제로 가중치 수 계산
nonzero_weights, total_weights = count_nonzero_weights(model)
print(f"Number of non-zero weights: {nonzero_weights}")
print(f"Total number of weights: {total_weights}")
print(f"Percentage of non-zero weights: {100 * nonzero_weights / total_weights:.2f}%")

Number of non-zero weights: 48741362
Total number of weights: 241868660
Percentage of non-zero weights: 20.15%
