In [19]:
import torch
import torch.nn as nn
import torch.optim as optim

import pandas as pd

In [20]:
df_all = pd.read_csv("data_by_all_20230514.csv")
df_states = pd.read_csv("data_by_states_20230514.csv")

In [39]:
# ID를 기준으로 dataframe 머지.
df = pd.merge(df_states, df_all, on='ID')

In [40]:
# 작물이름에 따라 groupby
grouped = df.groupby('crop')

group_dict = {}  # 그룹을 저장할 딕셔너리

for group_id, group_df in grouped:
		if group_id != 'Wheat':
				# 아래 column들 삭제 및 column 명 변경
				group_df = group_df.drop(
						['productions_winter', 'harvested_winter', 'cultivated_winter'], axis=1)
				group_df.rename(
						columns={'productions_spring': 'productions', 'harvested_spring': 'harvested', 'cultivated_spring': 'cultivated'})
				
				# 결측치 있는 행 삭제
				group_df = group_df.dropna()
				group_dict[group_id] = group_df
		else:
				# productions_spring 열과 productions_winter 열이 모두 빈 행 삭제
				group_df = group_df.dropna(
						subset=['productions_spring', 'productions_winter'], how='all')
				
				# 겨울철 밀 데이터
				winter = group_df.copy()
				winter = winter.drop(
						['productions_spring', 'harvested_spring', 'cultivated_spring'], axis=1)
				winter.rename(
						columns={'productions_winter': 'productions', 'harvested_winter': 'harvested', 'harvested_winter': 'cultivated'})
				
				# 봄철 밀 데이터
				spring = group_df.copy()
				spring = spring.drop(
						['productions_winter', 'harvested_winter', 'cultivated_winter'], axis=1)
				spring.rename(
						columns={'productions_spring': 'productions', 'harvested_spring': 'harvested', 'cultivated_spring': 'cultivated'})
				
				winter.dropna()
				spring.dropna()
				group_dict['Winter_wheat'] = winter
				group_dict['Spring_wheat'] = spring

		


In [37]:
print(len(group_dict['Corn']))
print(len(group_dict['Rice']))
print(len(group_dict['Wheat']))


3456
3348
3756


In [None]:
class FieldCropsPredictionModel(nn.Module):
		def __init__(self, input_dim, hidden_dim, layer_num=2):
				super(FieldCropsPredictionModel, self).__init__()
				self.filter_layer = nn.Linear(input_dim, 1, bias=True)
				self.hidden_layers = nn.ModuleList()
				self.output_layers = nn.Linear(hidden_dim, 1)

				for i in range(layer_num):
					self.hidden_layers.append(nn.Linear(12 if i == 0 else hidden_dim, hidden_dim))
				
		def forward(self, input):
			input = input.t()
			# 배치를 사용할 때
			# input = input.transpose(1, 2)
			out = self.filter_layer(input)
			for i, layer in enumerate(self.hidden_layers):
				out = layer(out)
			
			out = self.output_layers(out)

			return out

In [None]:
def train(model, dataloader, criterion, optim, scheduler, num_epochs):
		model.train()
		train_loss_list = []
		for epoch in range(num_epochs):
				running_loss = 0.0
				
				for inputs, labels in dataloader:
						outputs = model(inputs)
						loss = criterion(outputs, labels)

						optim.zero_grad()
						loss.backward()
						optim.step()

						running_loss += loss
				
				scheduler.step()

				epoch_loss = running_loss / len(dataloader.dataset)
				train_loss_list.append(epoch_loss)
		
		return model, train_loss_list

In [None]:
def validation(model, dataloader, criterion, num_epochs):
		model.eval()
		val_loss_list = []
		for epoch in range(num_epochs):
				running_loss = 0.0
				
				for inputs, labels in dataloader:
						outputs = model(inputs)
						loss = criterion(outputs, labels)

						running_loss += loss
				

				epoch_loss = running_loss / len(dataloader.dataset)
				val_loss_list.append(epoch_loss)
		
		return model, val_loss_list

In [None]:
model = FieldCropsPredictionModel(10, 15, 2)
optimizer_ft = optim.Adam(model.parameters(), lr=0.001)
exp_lr_scheduler = optim.lr_scheduler.StepLR(
    optimizer_ft, step_size=7, gamma=0.1)
criterion = nn.MSELoss()

In [None]:
model_train, train_loss = train(
    model, train_loader, criterion, optimizer_ft, exp_lr_scheduler, 50)

model_val, val_loss = validation(
    model, val_loader, criterion, 50)