In [338]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

df = pd.read_csv('./output/colors_2024-03-20T19-42-53.csv')


In [328]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 196 entries, 0 to 195
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   word    196 non-null    object
 1   r       196 non-null    int64 
 2   g       196 non-null    int64 
 3   b       196 non-null    int64 
dtypes: int64(3), object(1)
memory usage: 6.3+ KB


In [329]:
# turn loaded dataframe into torch tensors
from word_encoding import WordEncoder

we = WordEncoder()

inputs = we.encode_list(df['word'])
for i in range(len(input)):
    inputs[i]= torch.transpose(inputs[i], 0, 1)
    i+=1
print(inputs[0].shape)
torch.save(inputs, 'inputs0.pt')

torch.Size([2, 45])


In [330]:
outputs = torch.tensor(df.drop(['word'],axis='columns').values)
torch.save(outputs, 'outputs0.pt')

In [331]:
# create dataset class to take inputs & outputs
class Data(Dataset):
    # Constructor
    def __init__(self, inputs, outputs):
        self.x = inputs
        self.y = outputs
        self.len = len(inputs)
    
    # Getter
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    # Get number of samples
    def __len__(self):
        return self.len

In [332]:
# Making Model
class ColorPredictor(torch.nn.Module):
    #Constructor
    def __init__(self):
        super(ColorPredictor, self).__init__()
        self.linear = torch.nn.Linear(45, 3, device= device) #length of encoded word vectors & size of r,g,b vectors
        
    # Prediction
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y_pred = self.linear(x)
        return y_pred

In [333]:
# Create model

model = ColorPredictor()


In [334]:
# create train/test split
train_size = int(0.8 * len(inputs))
train_data = Data(inputs[:train_size], outputs[:train_size])
test_data = Data(inputs[train_size:], outputs[train_size:])

In [335]:
# create DataLoaders
train_loader = DataLoader(dataset = train_data, batch_size=2)
test_loader = DataLoader(dataset = test_data, batch_size=2)

In [336]:
# criterion & optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters())

In [341]:
# train & test loops
torch.manual_seed(42)

epochs = 200

epoch_count = []
loss_values = []
test_loss_values = []

for epoch in range(epochs):
    # Train
    model.train()
    for x,y in train_loader:
        y_pred = model(x)
        loss = criterion(y_pred.float(), y.float())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    # test
    model.eval()
    with torch.inference_mode():
        for x,y in test_loader:
            test_pred = model(x)
            test_loss = criterion(test_pred, y)
        
    if epoch % 10 == 0:
        epoch_count.append(epoch)
        loss_values.append(loss)
        test_loss_values.append(test_loss)
        print(f'Epoch: {epoch} | Loss: {loss:.4f} | Test Loss: {test_loss:.4f}')

  return F.mse_loss(input, target, reduction=self.reduction)


KeyboardInterrupt: 