In [4]:
import json
from transformers import T5ForConditionalGeneration, T5Tokenizer
from torch.utils.data import DataLoader, Dataset
import torch
from torch.optim import AdamW

# Dummy data
dummy_data = [
    {
        "date": "2023-12-25",
        "outfit": {
            "color": "Grey",
            "pattern": "Solid",
            "material": "Fleece",
            "occasion": "Casual",
            "accessories": ["Sneakers", "Backpack"],
            "season": "Winter"
        }
    },
    {
        "date": "2023-07-04",
        "outfit": {
            "color": "Red",
            "pattern": "Striped",
            "material": "Cotton",
            "occasion": "Formal",
            "accessories": ["Hat", "Watch"],
            "season": "Summer"
        }
    },
    {
        "date": "2023-11-23",
        "outfit": {
            "color": "Brown",
            "pattern": "Plaid",
            "material": "Wool",
            "occasion": "Casual",
            "accessories": ["Scarf", "Boots"],
            "season": "Fall"
        }
    },
    {
        "date": "2023-05-01",
        "outfit": {
            "color": "Blue",
            "pattern": "Floral",
            "material": "Linen",
            "occasion": "Casual",
            "accessories": ["Sunglasses", "Sandals"],
            "season": "Spring"
        }
    },
    {
        "date": "2023-09-10",
        "outfit": {
            "color": "Green",
            "pattern": "Camouflage",
            "material": "Polyester",
            "occasion": "Outdoor",
            "accessories": ["Cap", "Boots"],
            "season": "Fall"
        }
    }
]

# Save dummy data to a file
with open('fashion_trends_dummy.json', 'w') as f:
    json.dump(dummy_data, f)

class FashionDataset(Dataset):
    def __init__(self, data_path, tokenizer, max_length=512):
        self.data = json.load(open(data_path, 'r'))
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        date = item['date']
        outfit = json.dumps(item['outfit'])
        inputs = self.tokenizer(date, max_length=self.max_length, truncation=True, padding="max_length", return_tensors="pt")
        outputs = self.tokenizer(outfit, max_length=self.max_length, truncation=True, padding="max_length", return_tensors="pt")
        input_ids = inputs.input_ids.squeeze()
        attention_mask = inputs.attention_mask.squeeze()
        labels = outputs.input_ids.squeeze()
        labels[labels == self.tokenizer.pad_token_id] = -100  # Replace padding token id's with -100
        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": labels
        }

def collate_fn(batch):
    input_ids = torch.stack([item['input_ids'] for item in batch])
    attention_mask = torch.stack([item['attention_mask'] for item in batch])
    labels = torch.stack([item['labels'] for item in batch])
    return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}

def fine_tune_model(data_path, model_save_path, epochs=5, batch_size=4, lr=5e-5):
    tokenizer = T5Tokenizer.from_pretrained('t5-small')
    model = T5ForConditionalGeneration.from_pretrained('t5-small')

    dataset = FashionDataset(data_path, tokenizer)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

    optimizer = AdamW(model.parameters(), lr=lr)

    model.train()
    for epoch in range(epochs):
        for batch in dataloader:
            optimizer.zero_grad()
            input_ids = batch['input_ids']
            attention_mask = batch['attention_mask']
            labels = batch['labels']
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()
            print(f"Epoch: {epoch}, Loss: {loss.item()}")

    model.save_pretrained(model_save_path)
    tokenizer.save_pretrained(model_save_path)

def generate_outfit(model_path, date):
    model = T5ForConditionalGeneration.from_pretrained(model_path)
    tokenizer = T5Tokenizer.from_pretrained(model_path)
    inputs = tokenizer(date, return_tensors="pt")
    output_sequences = model.generate(input_ids=inputs.input_ids, attention_mask=inputs.attention_mask, max_new_tokens=100)
    predicted_outfit = tokenizer.decode(output_sequences[0], skip_special_tokens=True)

    # Debugging: Print the raw output
    print(f"Raw model output: {predicted_outfit}")

    try:
        outfit_dict = json.loads(predicted_outfit)
    except json.JSONDecodeError as e:
        print(f"JSON decode error: {e}")
        return None
    return outfit_dict

# Fine-tuning the model
data_path = 'fashion_trends_dummy.json'  # Path to the dummy dataset
model_save_path = 'fashion_trend_model'  # Path to save the fine-tuned model
fine_tune_model(data_path, model_save_path)

# Generating an outfit
future_date = "2024-12-25"
predicted_outfit = generate_outfit(model_save_path, future_date)
print(predicted_outfit)



Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Epoch: 0, Loss: 6.041658401489258
Epoch: 0, Loss: 11.242891311645508
Epoch: 1, Loss: 7.000406742095947
Epoch: 1, Loss: 5.712445259094238
Epoch: 2, Loss: 5.989035606384277
Epoch: 2, Loss: 4.337033748626709
Epoch: 3, Loss: 5.586666107177734
Epoch: 3, Loss: 4.494311809539795
Epoch: 4, Loss: 4.783902645111084
Epoch: 4, Loss: 3.7604734897613525


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Raw model output: 2024-12-25
JSON decode error: Extra data: line 1 column 5 (char 4)
None
