In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import urllib.request, json 
#from tensorflow import keras

import pandas as pd
import json
import numpy as np
import os
import time

device = None
LongTensor = None
if torch.cuda.is_available(): # device agnostic code
    device = torch.device('cuda')
    LongTensor = torch.cuda.LongTensor
    print("using gpu")
else:                                                   #
    device = torch.device('cpu')
    LongTensor = torch.LongTensor
    print("using cpu")


using gpu


In [2]:
# Dataset Preparation
print ("Read Dataset ... ")
def read_dataset(path):
	return json.load(open(path)) 

def read_dataset_url(path):
  with urllib.request.urlopen(path) as url:
    data = json.load(url)#.read().decode())
    return data

try:
    train = read_dataset('../data/kaggle_cooking/train.json')
    test = read_dataset('../data/kaggle_cooking/test.json')
except:
    train = read_dataset_url('https://raw.githubusercontent.com/AssassinTee/Seminar/master/data/kaggle_cooking/train.json')
    test = read_dataset_url('https://raw.githubusercontent.com/AssassinTee/Seminar/master/data/kaggle_cooking/test.json')
    

Read Dataset ... 


In [3]:
print ("Prepare text data of Train and Test ... ")
def generate_text(data):
	text_data = [" ".join(doc['ingredients']).lower() for doc in data]
	return text_data

train_text = generate_text(train)
test_text = generate_text(test)
target = [doc['cuisine'] for doc in train]
#print(target)
#print(len(target))
target_set=set(target)
#print(target_set)
#print(len(target_set))

Prepare text data of Train and Test ... 


In [0]:
tfidf = TfidfVectorizer(binary=True)
def tfidf_features(txt, flag):
    if flag == "train":
        x = tfidf.fit_transform(txt)
    else:
        x = tfidf.transform(txt)
    x = x.astype('float16')
    return x

X = tfidf_features(train_text, flag="train")
X_test = tfidf_features(test_text, flag="test")

In [0]:
lb = LabelEncoder()
ylab = lb.fit_transform(target)
ylab = ylab.reshape((len(ylab), 1))

In [0]:
#Init Model Weights
def init_weights(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)
        
#Build Sequential Model        
model = nn.Sequential(nn.Linear(3010, 300), nn.ReLU(), nn.Dropout(p=0.21), nn.Linear(300, 300), nn.ReLU(), nn.Dropout(p=0.21), nn.Linear(300, 20), nn.Softmax())
model.apply(init_weights)

#Set loss
CEloss = nn.CrossEntropyLoss()

#CUDA related stuff...
if torch.cuda.is_available():
    model.cuda()
model.to(device)

#Set optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

#Convert target to tensor
ytargets = LongTensor(ylab)    

X = X.astype("float32").todense()

inputs = []
for i in range(X.shape[0]):
  inputs.append(torch.from_numpy(X[i]).to(device))
#print(inputs)

In [7]:
# Start Training
for epoch in range(30):
    running_loss = 0.0
    epoch_start = time.time()
    measure_start = time.time()
    for i in range(X.shape[0]):
        #For Dropout
        model.train()
        
        #Put input into model to get output
        output = model(inputs[i])
        
        #calculate loss
        loss = CEloss(output, ytargets[i])
        
        #backward step
        loss.backward()
        
        #optimizer step
        optimizer.step()
        #Reset optimizer grad
        optimizer.zero_grad()
        
        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            measure_end = time.time()
            print('[%d, %5d] loss: %.3f, time: %.3f seconds' %
                  (epoch + 1, i + 1, running_loss / 2000, measure_end-measure_start))
            running_loss = 0.0
            measure_start = time.time()
    epoch_end = time.time()
    print('Epoch time: %.3f seconds'%(epoch_end-epoch_start))

[1,  2000] loss: 2.429, time: 1.949 seconds
[1,  4000] loss: 2.159, time: 1.951 seconds
[1,  6000] loss: 2.111, time: 1.931 seconds
[1,  8000] loss: 2.032, time: 1.938 seconds
[1, 10000] loss: 2.019, time: 1.916 seconds
[1, 12000] loss: 1.962, time: 1.900 seconds
[1, 14000] loss: 2.063, time: 1.938 seconds
[1, 16000] loss: 1.974, time: 1.900 seconds
[1, 18000] loss: 1.995, time: 1.899 seconds
[1, 20000] loss: 1.948, time: 1.908 seconds
[1, 22000] loss: 1.957, time: 1.939 seconds
[1, 24000] loss: 1.921, time: 1.938 seconds
[1, 26000] loss: 1.948, time: 1.955 seconds
[1, 28000] loss: 1.949, time: 1.959 seconds
[1, 30000] loss: 1.970, time: 1.942 seconds
[1, 32000] loss: 1.896, time: 1.932 seconds
[1, 34000] loss: 1.941, time: 1.913 seconds
[1, 36000] loss: 1.940, time: 1.956 seconds
[1, 38000] loss: 1.895, time: 1.908 seconds
Epoch time: 38.372 seconds
[2,  2000] loss: 1.902, time: 1.898 seconds
[2,  4000] loss: 1.911, time: 1.900 seconds
[2,  6000] loss: 1.950, time: 1.931 seconds
[2,  

In [8]:
path = "../models/pytorch_kaggle_cooking/pytorch_kaggle_cooking.pt"
torch.save(model, path)
model2 = torch.load(path)
print(model2.eval())


FileNotFoundError: ignored