<a href="https://colab.research.google.com/github/TheCaveOfAdullam/study3/blob/main/GaussianMagnitudeFusionPruning1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install tensorflow_model_optimization

Collecting tensorflow_model_optimization
  Downloading tensorflow_model_optimization-0.8.0-py2.py3-none-any.whl.metadata (904 bytes)
Downloading tensorflow_model_optimization-0.8.0-py2.py3-none-any.whl (242 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.5/242.5 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorflow_model_optimization
Successfully installed tensorflow_model_optimization-0.8.0


In [3]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import LabelEncoder
from scipy.stats import norm
import tensorflow_model_optimization as tfmot

In [4]:
# 기본 경로 설정
base_dir = '/content/drive/MyDrive/ship_motor10'
categories = ['normal', 'fault_BB', 'fault_RI', 'fault_SM']

# 데이터 로드 및 전처리 함수 정의
def load_data(base_dir, split):
    X = []
    y = []
    split_dir = os.path.join(base_dir, split)
    for category in categories:
        category_dir = os.path.join(split_dir, category)
        for file in os.listdir(category_dir):
            file_path = os.path.join(category_dir, file)
            data = pd.read_csv(file_path, header=None).values
            data = pd.to_numeric(data.flatten(), errors='coerce').reshape(-1, data.shape[1])
            data = np.nan_to_num(data).astype('float32')  # NaN 값을 0으로 대체하고, float32로 변환
            X.append(data)
            y.append(category)
    return np.array(X), np.array(y)

# 데이터 로드
X_train, y_train = load_data(base_dir, 'train')
X_val, y_val = load_data(base_dir, 'validation')
X_test, y_test = load_data(base_dir, 'test')

# 데이터 차원 변경 (CNN 입력 형식에 맞게)
X_train = np.expand_dims(X_train, axis=-1)
X_val = np.expand_dims(X_val, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

# 레이블 인코딩
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)
y_test_encoded = label_encoder.transform(y_test)

# 원-핫 인코딩
y_train_categorical = tf.keras.utils.to_categorical(y_train_encoded, num_classes=len(categories))
y_val_categorical = tf.keras.utils.to_categorical(y_val_encoded, num_classes=len(categories))
y_test_categorical = tf.keras.utils.to_categorical(y_test_encoded, num_classes=len(categories))


In [6]:
# 모델 정의
model = models.Sequential()

# Conv1 레이어
model.add(layers.Conv1D(filters=64, kernel_size=16, strides=16, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
# Pool1 레이어
model.add(layers.MaxPooling1D(pool_size=2, strides=2))

# Conv2 레이어
model.add(layers.Conv1D(filters=32, kernel_size=3, strides=1, activation='relu'))

# Conv3 레이어
model.add(layers.Conv1D(filters=64, kernel_size=5, strides=1, activation='relu'))

# Conv4 레이어
model.add(layers.Conv1D(filters=128, kernel_size=5, strides=1, activation='relu'))

# Pool2 레이어
model.add(layers.MaxPooling1D(pool_size=2, strides=2))

# Flatten 레이어
model.add(layers.Flatten())

# FC1 레이어
model.add(layers.Dense(units=5000, activation='relu'))

# FC2 레이어
model.add(layers.Dense(units=1000, activation='relu'))

# Output 레이어
model.add(layers.Dense(len(categories), activation='softmax'))

# 아담 옵티마이저 학습률 조정
# optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, clipnorm=1.0)

# 모델 컴파일
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 모델 요약 출력
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_4 (Conv1D)           (None, 750, 64)           2112      
                                                                 
 max_pooling1d_2 (MaxPoolin  (None, 375, 64)           0         
 g1D)                                                            
                                                                 
 conv1d_5 (Conv1D)           (None, 373, 32)           6176      
                                                                 
 conv1d_6 (Conv1D)           (None, 369, 64)           10304     
                                                                 
 conv1d_7 (Conv1D)           (None, 365, 128)          41088     
                                                                 
 max_pooling1d_3 (MaxPoolin  (None, 182, 128)          0         
 g1D)                                                 

In [7]:
# 모델 훈련
history = model.fit(X_train, y_train_categorical, epochs=10, validation_data=(X_val, y_val_categorical))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
# 모델 평가
test_loss, test_accuracy = model.evaluate(X_test, y_test_categorical)
print(f'Test accuracy: {test_accuracy}')

Test accuracy: 0.9911110997200012


In [9]:
# 원본 모델 저장
model.save('model.h5')

# 모델 사이즈 측정
model_size = os.path.getsize('model.h5') / (1024 * 1024)
print(f"Model Size: {model_size:.2f} MB")

  saving_api.save_model(


Model Size: 1391.09 MB


In [10]:
# 가중치 비율 계산 함수
def calculate_non_zero_weights(model):
    total_weights = 0
    non_zero_weights = 0
    for layer in model.layers:
        if hasattr(layer, 'kernel'):
            weights = layer.get_weights()[0]
            total_weights += weights.size
            non_zero_weights += np.count_nonzero(weights)
    return non_zero_weights, total_weights

# 프루닝 전 가중치 비율 계산
non_zero_weights_before, total_weights_before = calculate_non_zero_weights(model)
print(f"Before pruning: Non-zero weights = {non_zero_weights_before}, Total weights = {total_weights_before}")

Before pruning: Non-zero weights = 121543379, Total weights = 121543392


In [11]:
# 1단계: 매그니튜드 기반 프루닝
def magnitude_based_pruning(layer, sparsity):
    """매그니튜드 기반 프루닝"""
    if hasattr(layer, 'kernel'):
        weights, biases = layer.get_weights()
        threshold = np.percentile(np.abs(weights), sparsity * 100)
        pruned_weights = np.where(np.abs(weights) < threshold, 0, weights)
        layer.set_weights([pruned_weights, biases])

# 2단계: 가우시안 기반 프루닝
def gaussian_importance(weights):
    """가중치의 중요도를 가우시안 분포로 계산"""
    mean = np.mean(weights)
    std = np.std(weights)
    importance = norm.pdf(weights, loc=mean, scale=std)
    return importance

def gaussian_pruning(layer, threshold_ratio=0.1):
    """가우시안 기반 프루닝"""
    if hasattr(layer, 'kernel'):
        weights, biases = layer.get_weights()
        importance = gaussian_importance(weights)
        threshold = np.percentile(importance, threshold_ratio * 100)
        pruned_weights = np.where(importance < threshold, 0, weights)
        layer.set_weights([pruned_weights, biases])

# 프루닝 수행: 먼저 매그니튜드 기반 프루닝, 그 다음 가우시안 기반 프루닝
def combined_pruning(model, magnitude_sparsity=0.5, gaussian_threshold_ratio=0.1):
    for layer in model.layers:
        if hasattr(layer, 'kernel'):
            # 1단계: 매그니튜드 기반 프루닝
            magnitude_based_pruning(layer, magnitude_sparsity)
            # 2단계: 가우시안 기반 프루닝
            gaussian_pruning(layer, gaussian_threshold_ratio)

# 모델의 각 레이어에 대해 프루닝 수행
combined_pruning(model, magnitude_sparsity=0.5, gaussian_threshold_ratio=0.1)

In [12]:
# 프루닝 후 가중치 비율 계산
non_zero_weights_after, total_weights_after = calculate_non_zero_weights(model)
print(f"After pruning: Non-zero weights = {non_zero_weights_after}, Total weights = {total_weights_after}")

# 최종 가중치 비율 계산
final_weight_ratio = non_zero_weights_after / total_weights_after
print(f"Final non-zero weight ratio: {final_weight_ratio:.4f}")

After pruning: Non-zero weights = 48617360, Total weights = 121543392
Final non-zero weight ratio: 0.4000


In [13]:
# 이후 모델 재학습 및 평가 수행
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train_categorical, epochs=10, validation_data=(X_val, y_val_categorical))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [14]:
# 최종 평가
test_loss, test_accuracy = model.evaluate(X_test, y_test_categorical)
print(f'Test accuracy after combined pruning: {test_accuracy}')

Test accuracy after combined pruning: 0.996666669845581


In [15]:
# 스트립 프루닝 적용
model_stripped = tfmot.sparsity.keras.strip_pruning(model)

# 스트립 프루닝 후 모델 저장
model_stripped.save('pruned_model_stripped.h5')

# 스트립 프루닝 후 모델 사이즈 측정
pruned_model_size = os.path.getsize('pruned_model_stripped.h5') / (1024 * 1024)
print(f"Stripped Pruned Model Size: {pruned_model_size:.2f} MB")



Stripped Pruned Model Size: 463.71 MB
