In [24]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
import tensorflow as tf
from tensorflow import keras

df = pd.read_csv("C:/dataset/abalone.csv",)
df

Unnamed: 0,id,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,0,M,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,1,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,2,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
3,3,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
4,4,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
...,...,...,...,...,...,...,...,...,...,...
4172,4172,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4173,4173,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4174,4174,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4175,4175,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


In [4]:
print(df.head())
print(df.info())

   id Sex  Length  Diameter  Height  Whole_weight  Shucked_weight  \
0   0   M   0.455     0.365   0.095        0.5140          0.2245   
1   1   M   0.350     0.265   0.090        0.2255          0.0995   
2   2   F   0.530     0.420   0.135        0.6770          0.2565   
3   3   M   0.440     0.365   0.125        0.5160          0.2155   
4   4   I   0.330     0.255   0.080        0.2050          0.0895   

   Viscera_weight  Shell_weight  Rings  
0          0.1010         0.150     15  
1          0.0485         0.070      7  
2          0.1415         0.210      9  
3          0.1140         0.155     10  
4          0.0395         0.055      7  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4177 entries, 0 to 4176
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              4177 non-null   int64  
 1   Sex             4177 non-null   object 
 2   Length          4177 non-null   float64
 3   Di

In [38]:
# 데이터의 상위 5개 샘플 확인
print(df.head())

# 결측치 확인
print(df.isnull().sum())

# Ring 컬럼은 분류와 회귀에서 모두 라벨로 사용
# 설명 변수와 타겟 변수 분리
X = df.drop(columns=['Rings'])  # 설명 변수
y = df['Rings']  # 타겟 변수

# 범주형 변수 'Sex'를 더미 변수로 변환 (원-핫 인코딩)
X = pd.get_dummies(X, columns=['Sex'], drop_first=True)

# 데이터셋을 학습용과 테스트용으로 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 데이터셋 정보 출력
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

   id Sex  Length  Diameter  Height  Whole_weight  Shucked_weight  \
0   0   M   0.455     0.365   0.095        0.5140          0.2245   
1   1   M   0.350     0.265   0.090        0.2255          0.0995   
2   2   F   0.530     0.420   0.135        0.6770          0.2565   
3   3   M   0.440     0.365   0.125        0.5160          0.2155   
4   4   I   0.330     0.255   0.080        0.2050          0.0895   

   Viscera_weight  Shell_weight  Rings  
0          0.1010         0.150     15  
1          0.0485         0.070      7  
2          0.1415         0.210      9  
3          0.1140         0.155     10  
4          0.0395         0.055      7  
id                0
Sex               0
Length            0
Diameter          0
Height            0
Whole_weight      0
Shucked_weight    0
Viscera_weight    0
Shell_weight      0
Rings             0
dtype: int64
(3341, 10) (836, 10) (3341,) (836,)


In [40]:
# 머신러닝 분류 모델: 랜덤 포레스트
rfc = RandomForestClassifier(n_estimators=100, random_state=42)
rfc.fit(X_train, y_train)

# 분류 모델 성능 평가
y_pred_class = rfc.predict(X_test)
print(f"Random Forest Classifier 정확도: {rfc.score(X_test, y_test)}")

Random Forest Classifier 정확도: 0.2727272727272727


In [42]:
# 머신러닝 회귀 모델: 랜덤 포레스트 회귀
rfr = RandomForestRegressor(n_estimators=100, random_state=42)
rfr.fit(X_train, y_train)

# 회귀 모델 성능 평가 (평균 제곱 오차)
y_pred_reg = rfr.predict(X_test)
mse = np.mean((y_test - y_pred_reg) ** 2)
print(f"Random Forest Regressor 평균 제곱 오차: {mse}")

Random Forest Regressor 평균 제곱 오차: 3.842545454545455


In [44]:
# 신경망 분류 모델
model_class = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')  # 이진 분류를 가정한 모델
])

model_class.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_class.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# 분류 신경망 모델 성능 평가
test_loss, test_acc = model_class.evaluate(X_test, y_test)
print(f"신경망 분류 모델 정확도: {test_acc}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 1.0372e-04 - loss: -5503.8428 - val_accuracy: 0.0000e+00 - val_loss: -37142.0547
Epoch 2/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 3.6221e-05 - loss: -64974.1211 - val_accuracy: 0.0000e+00 - val_loss: -217145.8125
Epoch 3/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 1.0372e-04 - loss: -305996.7500 - val_accuracy: 0.0000e+00 - val_loss: -736235.5000
Epoch 4/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 1.8647e-04 - loss: -935538.7500 - val_accuracy: 0.0000e+00 - val_loss: -1799651.8750
Epoch 5/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 3.6221e-05 - loss: -2201880.5000 - val_accuracy: 0.0000e+00 - val_loss: -3606123.2500
Epoch 6/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 3.33

In [46]:
# 신경망 회귀 모델
model_reg = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1)  # 출력 노드 1개 (연속 값)
])

model_reg.compile(optimizer='adam', loss='mse')
model_reg.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# 회귀 신경망 모델 성능 평가
test_loss = model_reg.evaluate(X_test, y_test)
print(f"신경망 회귀 모델 평균 제곱 오차: {test_loss}")

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 3815.3123 - val_loss: 34.0023
Epoch 2/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 37.1285 - val_loss: 33.2085
Epoch 3/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 35.5217 - val_loss: 32.4016
Epoch 4/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 35.1536 - val_loss: 29.4140
Epoch 5/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 34.0311 - val_loss: 36.3630
Epoch 6/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 30.7166 - val_loss: 18.4246
Epoch 7/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 17.7239 - val_loss: 13.5893
Epoch 8/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 13.1993 - val_loss: 12.6353
Epoch 9/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━