## 오버피팅 문제와 해결방법


In [44]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import numpy as np

# 실험용 무작위 데이터 생성
class RandomDataset(Dataset):
    def __init__(self, num_samples=1000, input_size=784, num_classes=10):
        self.num_samples = num_samples
        self.input_size = input_size
        self.num_classes = num_classes
        self.data, self.labels = self.generate_data()

    def generate_data(self):
        data = np.random.rand(self.num_samples, self.input_size).astype(np.float32)
        labels = np.random.randint(self.num_classes, size=self.num_samples)
        return torch.tensor(data), torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# 데이터 생성 및 DataLoader 설정
experiment_dataset = RandomDataset()
experiment_dataloader = DataLoader(experiment_dataset, batch_size=32, shuffle=True)

# 모델 정의
class CustomModel(nn.Module):
    def __init__(self):
        super(CustomModel, self).__init__()
        self.linear1 = nn.Linear(784, 512)
        self.linear2 = nn.Linear(512, 512)
        self.linear3 = nn.Linear(512, 512)
        self.linear4 = nn.Linear(512, 512)
        self.linear5 = nn.Linear(512, 10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=drop_prob)

    def forward(self, x):
        x = self.dropout(self.relu(self.linear1(x)))
        x = self.dropout(self.relu(self.linear2(x)))
        x = self.dropout(self.relu(self.linear3(x)))
        x = self.dropout(self.relu(self.linear4(x)))
        x = self.linear5(x)
        return x

def train_model(model, dataloader, num_epochs=1000, learning_rate=0.01):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        model.train()  # 학습 모드로 전환
        running_loss = 0.0

        for data in dataloader:
            inputs, labels = data
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # 매 에폭 종료 후 평균 손실 출력
        if epoch % 100 ==0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch, num_epochs, running_loss / len(dataloader)))


# 모델 및 데이터 확인
experiment_dataset = RandomDataset()
experiment_dataloader = DataLoader(experiment_dataset, batch_size=32, shuffle=True)

# 모델 생성
model = CustomModel()

# 모델 학습
train_model(model, experiment_dataloader, num_epochs=100, learning_rate=0.01)

def calculate_accuracy(model, dataloader):
    model.eval()  # 평가 모드로 전환
    correct = 0
    total = 0

    with torch.no_grad():
        for data in dataloader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    return accuracy

# 실험 데이터 생성 및 DataLoader 설정
experiment_dataset = RandomDataset()
experiment_dataloader = DataLoader(experiment_dataset, batch_size=32, shuffle=True)

# 모델 생성
model = CustomModel()

# 정확도 계산
accuracy = calculate_accuracy(model, experiment_dataloader)
print("accuracy:",accuracy*100,"%")

Epoch [0/100], Loss: 2.3011
accuracy: 10.299999999999999 %
