In [1]:
from clean_tabular_data import *
import pandas as pd
#load dataframe with products from csv
products_df = pd.read_csv('Products.csv',lineterminator ='\n')  
# drop nulls and cast pricing to numeric  
products_df = clean_product_data(products_df)

In [2]:
def get_encoder_and_decoder(products_df):
        unique_categories = list(products_df["label"].unique())
        encoder = {k: v for v,k in enumerate(unique_categories)}
        decoder = {v: k for v,k in enumerate(unique_categories)}
        return (encoder,decoder)

# create new label column
products_df["label"] = products_df["category"].str.split(r"\/", expand=True)[0].str.strip()
#get encoder and decoder
encoder,decoder = get_encoder_and_decoder(products_df)
#save decoder to file
f = open("image_decoder.pkl","w")
f.write(str(decoder))
f.close()

In [3]:
images_df = pd.read_csv('Images.csv',lineterminator ='\n')
# merge images and products to get new label column for images
training_df = pd.merge(images_df, products_df[['id', 'label']], left_on='product_id', right_on='id', how='left', suffixes=('','_y')).drop('id_y', axis=1).drop('product_id', axis=1)
training_df = training_df.iloc[: , 1:]
training_df['label'] = training_df['label'].replace(encoder)
training_df.to_csv('training_data.csv')

In [1]:
from clean_images import *
# path specific to my local machine
clean_image_data('D:/Documents/AICore/images_fb/images')

In [71]:
import torch
import os
from datetime import datetime
from torch.utils.data import DataLoader, random_split
from FBMClassifier import FBMClassifier
from FBMDataset import FBMDataset
import torch.nn.functional as F
from torch.optim import SGD
from torch.utils.tensorboard import SummaryWriter


def create_model_dir_path():
    parent_dir = 'model_evaluation'
    current_datetime = datetime.now().strftime('%y%m%d%H%M%S')
    child_dir = 'weights'
    path = os.path.join(os.getcwd(), parent_dir, current_datetime, child_dir)
    return path

def train(model,epochs):    
    writer = SummaryWriter()
    batch_id = 0

    path = create_model_dir_path() 
    os.makedirs(path)

    for epoch in range(epochs):   
        model.train(True)     
        for batch in train_loader:
            # get features and labels from the batch
            features,labels = batch
            # make a prediction
            prediction = model(features)
            # calculate loss
            criterion = F.cross_entropy(prediction,labels)
            # backward function calculates the gradient of the current tensor w.r.t graph leaves
            criterion.backward()
            print(criterion.item())
            writer.add_scalar('Loss', criterion.item(), batch_id)
            batch_id += 1

        
        # Set the model to evaluation mode
        model.eval()
        running_vloss = 0.0
        
        with torch.no_grad():
            for i, vdata in enumerate(val_loader):
                vinputs, vlabels = vdata
                voutputs = model(vinputs)
                vloss = F.cross_entropy(voutputs, vlabels)
                running_vloss += vloss

        avg_vloss = running_vloss / (i + 1)
        print(f'Average Loss: {avg_vloss}')
        torch.save(model.state_dict(), path)

#Steps for transfer learning: get pre trained model, change architecture to fit our problem, fine tune for our problem
classifer = FBMClassifier()
dataset = FBMDataset("training_data.csv","../images_fb/images/cleaned_images")

#obtain the list of targets
train_dataset,test_dataset,val_dataset = random_split(dataset, [0.7,0.15,0.15])
print(len(train_dataset))
print(len(test_dataset))
print(len(val_dataset))

train_loader = DataLoader(train_dataset,batch_size=1,shuffle=True)
test_loader = DataLoader(test_dataset,batch_size=1,shuffle=True)
val_loader = DataLoader(val_dataset,batch_size=1,shuffle=True)

train(classifer, 1)


Using cache found in C:\Users\user/.cache\torch\hub\NVIDIA_DeepLearningExamples_torchhub


8823
1891
1890
2.4874212741851807
2.5207931995391846
2.814282178878784
2.479762554168701


KeyboardInterrupt: 