In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# CUDA 사용 가능 여부 확인 및 장치 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"현재 사용중인 Device : {device}")

현재 사용중인 Device : cuda


In [None]:
# 타겟 인코딩
target_encoder = LabelEncoder()
train['Fertilizer Name'] = target_encoder.fit_transform(train['Fertilizer Name'])

# 전처리: 피처 추출
X = train.drop(['id', 'Fertilizer Name'], axis=1).values
y = train['Fertilizer Name'].values
X_test = test.drop('id', axis=1).values

# 데이터 정규화
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_test = scaler.transform(X_test)

# PyTorch tensor로 변환
X_tensor = torch.tensor(X, dtype=torch.float32).to(device)
y_tensor = torch.tensor(y, dtype=torch.long).to(device)

# 훈련 및 검증 세트 분리
X_train, X_val, y_train, y_val = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

# 모델 정의
class NeuralNet(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, num_classes)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x

num_classes = len(target_encoder.classes_)
model = NeuralNet(X.shape[1], num_classes).to(device)

# 손실 함수 및 옵티마이저 설정
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)

# 모델 훈련
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        model.eval()
        with torch.no_grad():
            val_outputs = model(X_val)
            val_loss = criterion(val_outputs, y_val)
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Validation Loss: {val_loss.item():.4f}')

# 테스트 데이터 예측
test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
model.eval()
with torch.no_grad():
    predictions = model(test_tensor)
    top3_predictions = torch.topk(predictions, 3, dim=1).indices.cpu().numpy()

# 제출 파일 생성
submission = pd.DataFrame({
    'id': test['id'],
    'Fertilizer Name': [' '.join(target_encoder.inverse_transform(pred)) for pred in top3_predictions]
})
submission.to_csv('submission.csv', index=False)


In [2]:
# 데이터 로드
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [3]:
train.head()

Unnamed: 0,id,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,0,37,70,36,Clayey,Sugarcane,36,4,5,28-28
1,1,27,69,65,Sandy,Millets,30,6,18,28-28
2,2,29,63,32,Sandy,Millets,24,12,16,17-17-17
3,3,35,62,54,Sandy,Barley,39,12,4,10-26-26
4,4,35,58,43,Red,Paddy,37,2,16,DAP


In [5]:
# 각 컬럼에 대해 encoder 만들고 변환
soil_encoder = LabelEncoder()
crop_encoder = LabelEncoder()

train['Soil Type'] = soil_encoder.fit_transform(train['Soil Type'])
test['Soil Type'] = soil_encoder.transform(test['Soil Type'])

train['Crop Type'] = crop_encoder.fit_transform(train['Crop Type'])
test['Crop Type'] = crop_encoder.transform(test['Crop Type'])

In [6]:
train.head()

Unnamed: 0,id,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,0,37,70,36,1,8,36,4,5,28-28
1,1,27,69,65,4,4,30,6,18,28-28
2,2,29,63,32,4,4,24,12,16,17-17-17
3,3,35,62,54,4,0,39,12,4,10-26-26
4,4,35,58,43,3,6,37,2,16,DAP


In [10]:
# 타겟 인코딩
target_encoder = LabelEncoder()
train['Fertilizer Name (encoded)'] = target_encoder.fit_transform(train['Fertilizer Name'])

In [12]:
print(train[['Fertilizer Name', 'Fertilizer Name (encoded)']].head(10))
print('클래스별 매핑:', dict(zip(target_encoder.classes_, target_encoder.transform(target_encoder.classes_))))
print('전체 클래스:', target_encoder.classes_)
print('라벨 개수:', len(target_encoder.classes_))

  Fertilizer Name  Fertilizer Name (encoded)
0           28-28                          4
1           28-28                          4
2        17-17-17                          2
3        10-26-26                          0
4             DAP                          5
5           20-20                          3
6           28-28                          4
7        14-35-14                          1
8        17-17-17                          2
9           20-20                          3
클래스별 매핑: {'10-26-26': np.int64(0), '14-35-14': np.int64(1), '17-17-17': np.int64(2), '20-20': np.int64(3), '28-28': np.int64(4), 'DAP': np.int64(5), 'Urea': np.int64(6)}
전체 클래스: ['10-26-26' '14-35-14' '17-17-17' '20-20' '28-28' 'DAP' 'Urea']
라벨 개수: 7


In [1]:
# 필요한 라이브러리 임포트
import torch  # PyTorch 딥러닝 프레임워크
import torch.nn as nn  # 신경망 모듈
import torch.optim as optim  # 최적화 알고리즘
from sklearn.preprocessing import LabelEncoder, StandardScaler  # 데이터 전처리 도구
from sklearn.model_selection import train_test_split  # 데이터 분할
import pandas as pd  # 데이터 처리
import matplotlib.pyplot as plt

In [2]:
def mapk(actual, predicted, k=3):
    """
    actual: (N,) numpy array, 정답 인덱스
    predicted: (N, k) numpy array, 각 row별로 상위 k개 예측 인덱스
    """
    score = 0.0
    for a, p in zip(actual, predicted):
        try:
            idx = list(p).index(a)
            score += 1.0 / (idx + 1)
        except ValueError:
            continue
    return score / len(actual)

In [3]:
# 데이터 로드
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [4]:
train.head(1)

Unnamed: 0,id,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,0,37,70,36,Clayey,Sugarcane,36,4,5,28-28


In [5]:
train['Fertilizer Name'].value_counts() # 다중분류모형

Fertilizer Name
14-35-14    114436
10-26-26    113887
17-17-17    112453
28-28       111158
20-20       110889
DAP          94860
Urea         92317
Name: count, dtype: int64

In [24]:
# 1. 인코더 생성
le_soil = LabelEncoder()
le_crop = LabelEncoder()
le_target = LabelEncoder()

# 2. fit_transform할 때 각각 다르게 써야 함
train['Soil Type'] = le_soil.fit_transform(train['Soil Type'])
train['Crop Type'] = le_crop.fit_transform(train['Crop Type'])
train['Fertilizer Name'] = le_target.fit_transform(train['Fertilizer Name'])

train.head()

Unnamed: 0,id,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,0,37,70,36,1,8,36,4,5,4
1,1,27,69,65,4,4,30,6,18,4
2,2,29,63,32,4,4,24,12,16,2
3,3,35,62,54,4,0,39,12,4,0
4,4,35,58,43,3,6,37,2,16,5


In [25]:
# 입력 특성과 타겟 변수 분리
features = train.drop(columns = ['id', 'Fertilizer Name']) # id와 타겟변수 제외한 특성
target = train['Fertilizer Name'] # 타겟변수

# 특성 스케일링 (정규화)
scaler = StandardScaler() # 표준화 스케일러 생성
features_scaled = scaler.fit_transform(features) # 특성 정규화

In [26]:
# 학습/검증 데이터 분할 (80:20)
X_train, X_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.2, random_state=42)

In [27]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((600000, 8), (150000, 8), (600000,), (150000,))

# 텐서변환 & GPU 사용

In [28]:
# GPU 사용 가능 여부에 따라 device 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 데이터를 PyTorch 텐서로 변환하고 device로 이동
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)  # 학습 데이터
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long).to(device)  # 학습 타겟
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)  # 검증 데이터
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long).to(device)  # 검증 타겟

# 신경망 모델 정의

In [34]:
class FertilizerNet(nn.Module):
    def __init__(self, input_dim, ioutput_dim):
        super(FertilizerNet, self).__init__
        self.fc1 = nn.Linear(input_dim, 32)  # 첫번째 연결층
        self.relu = nn.ReLU()                # 활성화 함수
        self.fc2 = nn.Linear(32, 16)         # 두번쨰 연결층
        self.out = nn.Linear(16, output_dim) # 출력층

    def forward(self, x):
        x = self.relu(self.fc1(x))           # 첫번째 층 통과
        x = self.relu(self.fc12(x))          # 두번째 층 통과
        x = self.out(x)                      # 출력층 통과
        return x

In [35]:
# 모델 초기화
input_dim = X_train_tensor.shape[1]          # 입력 특성의 차원
output_dim = len(le_target.classes_)          # 출력 클래스의 수

# input_dim, output_dim
model = FertilizerNet(input_dim, output_dim).to(device) # 모델 생성, GPU로 연산 준비
criterion = nn.CrossEntropyLoss()                       # 교차 엔트로피 손실 함수
optimizer = optim.Adam(model.parameters(), lr = 0.01)   # Adam

AttributeError: cannot assign module before Module.__init__() call

## 모델학습

In [32]:
epochs = 100 # 전체 학습 횟수
train_losses = []
val_losses = []
train_mpas = []
val_maps = []

for epoch in range(epochs):
    model.train()                             # 학습 모드
    outputs = model(X_train_tensor)           # 순전파
    loss = criterion(outputs, y_train_tensor) # 손실계산
    optimizer.zero_grad()                     # 그래디언트 초기화
    loss.backward()                           # 역전파
    optimizer.step()                          # 파라미터 업데이트
    train_losses.append(loss.item())          # 손실계산 집계

    # MAP@3 
    # train MAP@3
    with torch.no_grad():
        train_probs = torch.softmax(outputs, dim=1)
        # 각 샘플에 대해 상위 3개 클래스의 인덱스를 추출하고 CPU로 이동한 후 NumPy 배열로 변환
        train_top3 = torch.topk(train_probs, k=3, dim=1).indices.cpu().numpy()
        train_map = mapk(y_train_tensor.cpu().numpy(), train_top3, k=3)
        train_maps.append(train_map)

    # validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_test_tensor)
        val_loss = criterion(val_outputs, y_test_tensor)
        val_losses.append(val_loss.item())
        val_probs = torch.softmax(val_outputs, dim=1)
        val_top3 = torch.topk(val_probs, 3, dim=1).indices.cpu().numpy()
        val_map = mapk(y_test_tensor.cpu().numpy(), val_top3, k=3)
        val_maps.append(val_map)

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, Train MAP@3: {train_map:.4f}, Val MAP@3: {val_map:.4f}")

NameError: name 'model' is not defined