In [8]:
import random
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, BatchNormalization, ReLU
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

# train 데이터 불러오기
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/open/train.csv')
feature_names = data.iloc[0, 2:-1].index

# 초기 피처 수
feature_num = len(data.columns) - 3
initial_feature_count = feature_num
num_classes = 1

# 이전 모델의 헤드 부분 정의
input_head = (feature_num,)  # 헤드 부분의 입력 형태 (피처 수에 따라 조정)
head_model = create_mobilenetv3_like_model(input_head, num_classes)
prev_model_weights = None

# MobileNetV3-like 모델 정의
def create_mobilenetv3_like_model(input_shape, num_classes):
    input_tensor = Input(shape=input_shape)
    print("input_shape = ", input_shape)
    # Fully Connected Layer (Dense Layer)
    x = Dense(16, activation='relu')(input_tensor)
    # Depthwise Separable Convolution Blocks
    for _ in range(3):
        x = depthwise_conv_block(x, 16)
    x = Dense(32, activation='relu')(x)
    for _ in range(3):
        x = depthwise_conv_block(x, 32)
    x = Dense(64, activation='relu')(x)
    for _ in range(3):
        x = depthwise_conv_block(x, 64)
    x = Dense(128, activation='relu')(x)
    for _ in range(3):
        x = depthwise_conv_block(x, 128)
    # Classifier
    x = Dense(256, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    x = Dense(16, activation='relu')(x)
    # Output Layer
    output_tensor = Dense(num_classes, activation='sigmoid')(x)
    model = Model(inputs=input_tensor, outputs=output_tensor)
    return model

# Depthwise Separable Convolution Block
def depthwise_conv_block(x, filters):
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Dense(filters, activation='relu')(x)
    return x

# 피처 수 줄이기 반복 (랜덤 선택)
final_feature_count = 10
loop_num = 1
while initial_feature_count > final_feature_count:

    # 새로운 피처 수로 모델 재정의
    model = create_mobilenetv3_like_model(input_head, num_classes)
    optimizer=Adam(learning_rate=0.01)

    # 이전 모델의 가중치 불러오기
    if prev_model_weights != None:
        model.load_weights(prev_model_weights)
        optimizer = Adam(learning_rate=0.0001*loop_num)  # 작은 학습률로 조정
        loop_num /= 10

    # 모델 컴파일 (학습률을 조정하여 컴파일)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    # train 데이터 불러오기
    changable_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/open/train.csv')
    changable_data = changable_data.fillna(0)
    # 랜덤하게 특성 선택
    selected_features = random.sample(range(2, len(changable_data.columns) - 1), initial_feature_count)

    # 선택된 특성 이외의 특성을 0으로 설정
    for column in changable_data.columns[2:-1]:
        if changable_data.columns.get_loc(column) not in selected_features:
            changable_data[column] = 0

    # 입력 데이터 갱신
    scaler = StandardScaler()
    X = changable_data.iloc[1:, 2:-1].values  # 입력 데이터
    y = changable_data.iloc[1:, -1].values   # 출력 데이터 (0 또는 1)
    X = scaler.fit_transform(X)

    # 데이터 전처리
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # 입력 데이터를 TensorFlow Tensor로 변환
    X_train = tf.convert_to_tensor(X_train, dtype=tf.float32)
    X_test = tf.convert_to_tensor(X_test, dtype=tf.float32)
    y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)
    y_test = tf.convert_to_tensor(y_test, dtype=tf.float32)

    batch_size = 32
    epochs = 10

    # 모델 재학습
    history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test))

    print(f"Training accuracy with {initial_feature_count} features:", max(history.history['accuracy']))
    print(f"Validation accuracy with {initial_feature_count} features:", max(history.history['val_accuracy']))
    initial_feature_count -= 10
    model.save_weights(f'/content/drive/MyDrive/Colab Notebooks/open/strawberry_pirates_{initial_feature_count}.h5')
    prev_model_weights = f'/content/drive/MyDrive/Colab Notebooks/open/strawberry_pirates_{initial_feature_count}.h5'

print("Final Training accuracy:", max(history.history['accuracy']))
print("Final Validation accuracy:", max(history.history['val_accuracy']))

input_shape =  (51,)
input_shape =  (51,)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training accuracy with 51 features: 0.8388469815254211
Validation accuracy with 51 features: 0.8428520560264587
input_shape =  (51,)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training accuracy with 41 features: 0.8526829481124878
Validation accuracy with 41 features: 0.8453352451324463
input_shape =  (51,)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training accuracy with 31 features: 0.8274944424629211
Validation accuracy with 31 features: 0.828662633895874
input_shape =  (51,)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training accuracy with 21 features: 0.8335254788398743
Validation accuracy with 21 features: 0.8275984525680542
inpu

#테스트 파일을 학습 파일에 맞춰 저장

In [9]:
import pandas as pd

# 데이터 파일 경로 설정
train_data_path = '/content/drive/MyDrive/Colab Notebooks/open/train.csv'
test_data_path = '/content/drive/MyDrive/Colab Notebooks/open/test.csv'

# CSV 파일을 데이터프레임으로 읽어옴
changable_train_data = pd.read_csv(train_data_path)
test_data = pd.read_csv(test_data_path)

# 학습 데이터와 테스트 데이터에서 피쳐 이름을 추출
changable_train_feature = changable_train_data.columns[2:-1]
test_feature = test_data.columns[2:-1]

# 테스트 데이터의 길이에 맞게 학습 데이터를 잘라냄
changable_train_data = changable_train_data.iloc[:len(test_data)]

# 테스트 데이터의 피쳐를 기반으로 학습 데이터를 업데이트
for name in changable_train_feature:
    if name in test_feature:
        # 테스트 데이터의 해당 열을 가져와서 학습 데이터에 덮어쓰기
        changable_train_data[name] = test_data[name].values
    else:
        # 테스트 데이터에 해당 열이 없으면 0으로 채우기
        changable_train_data[name] = 0

# 업데이트된 학습 데이터를 새로운 CSV 파일로 저장
changable_train_data.to_csv('/content/drive/MyDrive/Colab Notebooks/open/updated_train.csv', index=False)


#테스트 파일 동작시키기

In [10]:
t_model = create_mobilenetv3_like_model(input_head, num_classes)
# 모델 컴파일
t_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
t_model.load_weights(prev_model_weights)


test_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/open/updated_train.csv', skiprows=[0])  # 데이터 파일 경로 지정, 첫 번째 행 스킵
test_data = test_data.fillna(0)
num_rows = test_data.shape[0]
test_X = test_data.iloc[:, 2:-1].values  # 입력 데이터
scaler = StandardScaler()
test_X = scaler.fit_transform(test_X)
len_data = len(test_data.columns)
for i in range(1000):
    sample_data = test_X[i].reshape(1, -1)  # 모델 입력 형태로 변환 (1개의 샘플 데이터)
    model_output = model.predict(sample_data)
    if model_output > 0.5:
        print("Model Output:", model_output)

input_shape =  (51,)
Model Output: [[0.9999689]]
Model Output: [[0.6328907]]
Model Output: [[0.69648814]]
Model Output: [[0.6911472]]
Model Output: [[0.84487516]]
Model Output: [[0.50335956]]
Model Output: [[0.818527]]
Model Output: [[0.5631576]]
Model Output: [[0.85211146]]
Model Output: [[0.50096166]]
Model Output: [[0.7537071]]
Model Output: [[0.77827305]]
Model Output: [[0.75152063]]
Model Output: [[0.595521]]
Model Output: [[0.8140946]]
Model Output: [[0.5477645]]
Model Output: [[0.7144611]]
Model Output: [[0.508875]]
Model Output: [[0.6477358]]
Model Output: [[0.5068868]]
Model Output: [[0.8573276]]
Model Output: [[0.5180998]]
Model Output: [[0.78080994]]
Model Output: [[0.56661433]]
Model Output: [[0.9055718]]
Model Output: [[0.8191481]]
Model Output: [[0.81047976]]
Model Output: [[0.6220533]]
Model Output: [[0.64321077]]
Model Output: [[0.6869841]]
Model Output: [[0.5233979]]
Model Output: [[0.6328108]]
Model Output: [[0.8583185]]
Model Output: [[0.5856075]]
Model Output: [[0.7