In [2]:
import tensorflow as tf
import pandas as pd

# 열 이름
column_names = ["A", "P", "C", "LK", "WK", "A_Coef", "LKG", "target"]

# 파싱 함수
def parse_line_to_list(line):
    line = tf.strings.strip(line)
    parts = tf.strings.split(line, sep="\t")
    parts = tf.strings.to_number(parts, out_type=tf.float32)
    return parts

# 필터: 유효한 줄만 통과
def is_valid_line(line):
    line = tf.strings.strip(line)
    parts = tf.strings.split(line, sep="\t")
    return tf.equal(tf.size(parts), 8)

# Dataset 로딩
dataset = tf.data.TextLineDataset("./seeds_dataset.txt") \
            .filter(is_valid_line) \
            .map(parse_line_to_list)

# 리스트로 변환
data_list = [list(row.numpy()) for row in dataset]

# DataFrame으로 변환
df = pd.DataFrame(data_list, columns=column_names)

df

Unnamed: 0,A,P,C,LK,WK,A_Coef,LKG,target
0,15.260000,14.84,0.8710,5.763,3.312,2.221,5.220,1.0
1,14.880000,14.57,0.8811,5.554,3.333,1.018,4.956,1.0
2,14.290000,14.09,0.9050,5.291,3.337,2.699,4.825,1.0
3,13.840000,13.94,0.8955,5.324,3.379,2.259,4.805,1.0
4,16.139999,14.99,0.9034,5.658,3.562,1.355,5.175,1.0
...,...,...,...,...,...,...,...,...
194,12.190000,13.20,0.8783,5.137,2.981,3.631,4.870,3.0
195,11.230000,12.88,0.8511,5.140,2.795,4.325,5.003,3.0
196,13.200000,13.66,0.8883,5.236,3.232,8.315,5.056,3.0
197,11.840000,13.21,0.8521,5.175,2.836,3.598,5.044,3.0


In [3]:
print(df.isnull().sum())  

A         0
P         0
C         0
LK        0
WK        0
A_Coef    0
LKG       0
target    0
dtype: int64


In [4]:
X = df.drop('target',axis=1)
X.head()

Unnamed: 0,A,P,C,LK,WK,A_Coef,LKG
0,15.26,14.84,0.871,5.763,3.312,2.221,5.22
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956
2,14.29,14.09,0.905,5.291,3.337,2.699,4.825
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805
4,16.139999,14.99,0.9034,5.658,3.562,1.355,5.175


In [9]:
y = df["target"].astype(int) - 1

print(y.value_counts())

target
1    68
0    66
2    65
Name: count, dtype: int64


In [11]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test=train_test_split(X,
                                                  y,
                                                  test_size=0.2,
                                                  shuffle=True,
                                                  random_state=12)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(159, 7) (159,)
(40, 7) (40,)


In [21]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(64, activation='relu', input_shape=(7,)))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dense(32, activation='relu'))
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(8, activation='sigmoid'))
model.add(tf.keras.layers.Dense(3, activation='softmax'))


# 모델 요약 보기
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [22]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy

model.compile(
    optimizer=Adam(),
    loss=SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

# 학습 (y_train은 반드시 0부터 시작하는 정수여야 함!)
model.fit(X_train, y_train, epochs=30, batch_size=8, validation_split=0.1)


Epoch 1/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.3587 - loss: 1.0924 - val_accuracy: 0.2500 - val_loss: 1.0724
Epoch 2/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4581 - loss: 1.0424 - val_accuracy: 0.6875 - val_loss: 0.9904
Epoch 3/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6050 - loss: 0.9631 - val_accuracy: 0.5625 - val_loss: 0.9204
Epoch 4/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6166 - loss: 0.9090 - val_accuracy: 0.5625 - val_loss: 0.8199
Epoch 5/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5939 - loss: 0.8240 - val_accuracy: 0.5625 - val_loss: 0.7678
Epoch 6/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5469 - loss: 0.8125 - val_accuracy: 0.5625 - val_loss: 0.7334
Epoch 7/30
[1m18/18[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x1fcba8bb520>