In [38]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# mps = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
mps = torch.device("cpu")
print('Using device: ', mps)

df = pd.read_csv('./output/colors_2024-03-22T18-04-05.csv')


Using device:  cpu


In [39]:
df.info()
display(df)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1080 entries, 0 to 1079
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   word    1080 non-null   object
 1   r       1080 non-null   int64 
 2   g       1080 non-null   int64 
 3   b       1080 non-null   int64 
dtypes: int64(3), object(1)
memory usage: 33.9+ KB


Unnamed: 0,word,r,g,b
0,A,113,221,120
1,B,53,191,116
2,C,181,61,238
3,D,84,179,60
4,E,22,64,85
...,...,...,...,...
1075,castle,181,61,238
1076,broad,53,191,116
1077,long,113,58,208
1078,could,181,61,238


In [40]:
# turn loaded dataframe into torch tensors
from word_encoding import WordEncoder

we = WordEncoder()

inputs_list = we.encode_list(df['word'])
inputs = torch.tensor((len(inputs_list),1,45), dtype=torch.float)
inputs = torch.cat(inputs_list, dim=0)
print(inputs.shape)
torch.save(inputs, 'inputs0.pt')
print(inputs[0].shape)

torch.Size([1080, 45])
torch.Size([45])


In [41]:
outputs = torch.tensor(df.drop(['word'],axis='columns').values)
print(outputs.shape)
torch.save(outputs, 'outputs0.pt')

torch.Size([1080, 3])


In [42]:
# create dataset class to take inputs & outputs
class Data(Dataset):
    # Constructor
    def __init__(self, inputs, outputs):
        self.x = inputs
        self.x = self.x.to(mps)
        self.y = outputs
        self.y = self.y.to(mps)
        self.len = len(inputs)
    
    # Getter
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    # Get number of samples
    def __len__(self):
        return self.len

In [43]:
# Making Model
class ColorPredictor(torch.nn.Module):
    #Constructor
    def __init__(self):
        super(ColorPredictor, self).__init__()
        self.linear1 = torch.nn.Linear(45, 20, device= mps) #length of encoded word vectors
        self.linear2 = torch.nn.Linear(20,10, device= mps)
        self.linear3 = torch.nn.Linear(10,3, device=mps)
        self.reLU = torch.nn.ReLU()
        
    # Prediction
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        out = self.linear1(x)
        out = self.reLU(out)
        out = self.linear2(out)
        out = self.reLU(out)
        out = self.linear3(out)
        return out

In [44]:
# Create model

model = ColorPredictor()
model.to(mps)


ColorPredictor(
  (linear1): Linear(in_features=45, out_features=20, bias=True)
  (linear2): Linear(in_features=20, out_features=10, bias=True)
  (linear3): Linear(in_features=10, out_features=3, bias=True)
  (reLU): ReLU()
)

In [45]:
# create train/test split
train_size = int(0.8 * len(inputs))
print(train_size)
train_data = Data(inputs[:train_size], outputs[:train_size])
test_data = Data(inputs[train_size:], outputs[train_size:])

864


In [46]:
# create DataLoaders
train_loader = DataLoader(dataset = train_data, batch_size=2)
test_loader = DataLoader(dataset = test_data, batch_size=2)

In [47]:
# criterion & optimizer
criterion = nn.MSELoss()
#criterion = nn.L1Loss()
optimizer = torch.optim.AdamW(model.parameters())

In [48]:
# train & test loops
import copy
import numpy as np
torch.manual_seed(42)

epochs = 1000

epoch_count = []
loss_values = []
test_loss_values = []
best_mse = np.inf
best_weights = None
history = []


for epoch in range(epochs):
    # Train
    model.train()
    for x,y in train_loader:
        y_pred = model(x)
   #     y = torch.reshape(y, (2,1,3))
        loss = criterion(y_pred.float(), y.float())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        
     # test
    model.eval()
    for x,y in test_loader:
        test_pred = model(x)           
        test_loss = criterion(test_pred, y)
        history.append(test_loss)
        if test_loss < best_mse:
            best_mse = test_loss
            best_weights = copy.deepcopy(model.state_dict())
            

print(f'Best MSE: {best_mse}')
       
 
    

Best MSE: 545.8510131835938


In [49]:
print(f'Test Pred: {test_pred}')
print(f'Actual: {y}')

Test Pred: tensor([[126.1159, 166.8350, 144.2717],
        [101.4453, 133.5551, 115.0252]], grad_fn=<AddmmBackward0>)
Actual: tensor([[181,  61, 238],
        [231, 168,  70]])
