In [1]:
from bsddb3 import btopen
import bcolz, pickle, os, shelve
import numpy as np
from math import ceil
from itertools import count
from collections import defaultdict
from difflib import SequenceMatcher
import tensorflow as tf
import tensorflow_hub as hub
from scipy import spatial

prefix = "/data/Vivek/Final/SIREN-Research/OntoEnricher/junk/Files/security_threshold_10_10/security"
train_file = "../files/dataset/train.tsv"
test_file = "../files/dataset/test.tsv"
output_folder = "../junk/Output/"
embeddings_folder = "../junk/Glove.dat"
USE_folder = "/home/vlead/USE"
embeddings_file = "/data/Vivek/glove.6B.300d.txt"
use_embeddings = "../files/embeddings.pt"

POS_DIM = 4
DEP_DIM = 5
DIR_DIM = 1
EMBEDDING_DIM = 300
NULL_PATH = ((0, 0, 0, 0),)
relations = ["hypernym", "hyponym", "synonym", "none"]
# relations = ["True", "False"]
NUM_RELATIONS = len(relations)

In [7]:
failed = []
success = []



def id_to_entity(db, entity_id):
    entity = db[str(entity_id)]
    if db == id2path_db:
        entity = "/".join(["*##*".join(e.split("_", 1)) for e in entity.split("/")])
    return entity


def entity_to_id(db, entity):
    if entity in db:
        success.append(entity)
        return int(db[entity])
    failed.append(entity)
    return -1

def extract_paths(db, x, y):
    key = (str(x) + '###' + str(y))
    try:
        relation = db[key]
        return {int(path_count.split(":")[0]): int(path_count.split(":")[1]) for path_count in relation.split(",")}
    except Exception as e:
        return {}

def load_embeddings_from_disk():
    try:
        vectors = bcolz.open(embeddings_folder)[:]
        words = pickle.load(open(embeddings_folder + 'words.pkl', 'rb'))
        word2idx = pickle.load(open(embeddings_folder + 'words_index.pkl', 'rb'))

        embeddings = vectors
    except:
        embeddings, word2idx = create_embeddings()
    return embeddings, word2idx
        

def create_embeddings():
    words = ['_unk_']
    idx = 1
    word2idx = {"_unk_": 0}
    vectors = bcolz.carray(np.random.random(300), rootdir=embeddings_folder, mode='w')
    with open(embeddings_file, 'r') as f:
        for l in f:
            line = l.split()
            word, vector = line[0], line[1:]
            words.append(word)
            vectors.append(np.array(vector).astype(np.float))
            word2idx[word] = idx
            idx += 1
    vectors = vectors.reshape((-1, EMBEDDING_DIM))
    row_norm = np.sum(np.abs(vectors)**2, axis=-1)**(1./2)
    vectors /= row_norm[:, np.newaxis]
    vectors = bcolz.carray(vectors, rootdir=embeddings_folder, mode='w')
    vectors.flush()

    pickle.dump(words, open(embeddings_folder + 'words.pkl', 'wb'))
    pickle.dump(word2idx, open(embeddings_folder + 'words_index.pkl', 'wb'))
    
    return vectors, word2idx

word2id_db = shelve.open(prefix + "_word_to_id_dict.db", 'r')
id2word_db = shelve.open(prefix + "_id_to_word_dict.db", "r")
path2id_db = shelve.open(prefix + "_path_to_id_dict.db", "r")
id2path_db = shelve.open(prefix + "_id_to_path_dict.db", "r")
relations_db = shelve.open(prefix + "_relations_map.db", "r")

embeddings, emb_indexer = load_embeddings_from_disk()

train_dataset = {tuple(l.split("\t")[:2]): l.split("\t")[2] for l in open(train_file).read().split("\n")}
test_dataset = {tuple(l.split("\t")[:2]): l.split("\t")[2] for l in open(test_file).read().split("\n")}



In [None]:
arrow_heads = {">": "up", "<":"down"}

def extract_direction(edge):
    
    if edge[0] == ">" or edge[0] == "<":
        direction = "start_" + arrow_heads[edge[0]]
        edge = edge[1:]
    elif edge[-1] == ">" or edge[-1] == "<":
        direction = "end_" + arrow_heads[edge[-1]]
        edge = edge[:-1]
    else:
        direction = ' '
    return direction, edge
    
def parse_path(path):
    parsed_path = []
    for edge in path.split("*##*"):
        direction, edge = extract_direction(edge)
        if edge.split("/"):
            try:
                embedding, pos, dependency = edge.split("/")
            except:
                print (edge, path)
                raise
            emb_idx, pos_idx, dep_idx, dir_idx = emb_indexer.get(embedding, 0), pos_indexer[pos], dep_indexer[dependency], dir_indexer[direction]
            parsed_path.append(tuple([emb_idx, pos_idx, dep_idx, dir_idx]))
        else:
            return None
    return tuple(parsed_path)

def extract_all_paths(x,y):
    
    paths = list(extract_paths(relations_db,x,y).items()) + list(extract_paths(relations_db,y,x).items())
    x_word = id_to_entity(id2word_db, x) if x!=-1 else "X"
    y_word = id_to_entity(id2word_db, y) if y!=-1 else "Y"
    path_count_dict = { id_to_entity(id2path_db, path).replace("X/", x_word+"/").replace("Y/", y_word+"/") : freq for (path, freq) in paths }
    path_count_dict = { parse_path(path) : path_count_dict[path] for path in path_count_dict }

    return { path : path_count_dict[path] for path in path_count_dict if path}
    
def parse_dataset(dataset):
    keys = [(entity_to_id(word2id_db, x), entity_to_id(word2id_db, y)) for (x, y) in dataset]
    paths = [extract_all_paths(x,y) for (x,y) in keys]
    empty = [list(dataset)[i] for i, path_list in enumerate(paths) if len(list(path_list.keys())) == 0]
    print('Pairs without paths:', len(empty), ', all dataset:', len(dataset))
    embed_indices = [(emb_indexer.get(x,0), emb_indexer.get(y,0)) for (x,y) in keys]
    return embed_indices, paths
    
pos_indexer, dep_indexer, dir_indexer = defaultdict(count(0).__next__), defaultdict(count(0).__next__), defaultdict(count(0).__next__)
unk_pos, unk_dep, unk_dir = pos_indexer["#UNKNOWN#"], dep_indexer["#UNKNOWN#"], dir_indexer["#UNKNOWN#"]

dataset_keys = list(train_dataset.keys()) + list(test_dataset.keys())
dataset_vals = list(train_dataset.values()) + list(test_dataset.values())

embed_indices, x = parse_dataset(dataset_keys)
y = [i for (i,relation) in enumerate(dataset_vals)]

embed_indices_train, embed_indices_test = np.array(embed_indices[:len(train_dataset)]), np.array(embed_indices[len(train_dataset):len(train_dataset)+len(test_dataset)])
x_train, x_test = np.array(x[:len(train_dataset)]), np.array(x[len(train_dataset):len(train_dataset)+len(test_dataset)])
y_train, y_test = np.array(y[:len(train_dataset)]), np.array(y[len(train_dataset):len(train_dataset)+len(test_dataset)])

In [None]:
class LSTM(nn.Module):

    def __init__(self):
        
        super(LSTM, self).__init__()
        self.cache = {}
        
        self.hidden_dim = HIDDEN_DIM + 2 * EMBEDDING_DIM
        self.input_dim = POS_DIM + DEP_DIM + EMBEDDING_DIM + DIR_DIM
        self.W = nn.Linear(NUM_RELATIONS, self.input_dim)
        self.dropout_layer = nn.Dropout(p=dropout)
        self.softmax = nn.LogSoftmax()
        
        self.word_embeddings = nn.Embedding(len(embeddings), EMBEDDING_DIM)
        self.word_embeddings.load_state_dict({'weight': torch.from_numpy(np.array(embeddings))})
        self.word_embeddings.require_grad = False
        
        self.pos_embeddings = nn.Embedding(len(pos_indexer), POS_DIM)
        self.dep_embeddings = nn.Embedding(len(dep_indexer), DEP_DIM)
        self.dir_embeddings = nn.Embedding(len(dir_indexer), DIR_DIM)
        
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, NUM_LAYERS)
    
    def embed_path(self, elem):
        path, count = elem
        if path in self.cache:
            return cache[path] * count
        lstm_inp = []
        for edge in path:
            inputs = [torch.Tensor([[el]]) for el in edge]
            word_embed = self.dropout_layer(self.word_embeddings(inputs[0]))
            pos_embed = self.dropout_layer(self.pos_embeddings(inputs[1]))
            dep_embed = self.dropout_layer(self.dep_embeddings(inputs[2]))
            dir_embed = self.dropout_layer(self.dir_embeddings(inputs[3]))
            embeds = np.concatenate((word_embed, pos_embed, dep_embed, dir_embed))
            lstm_inp.append(embeds)
        output, _ = self.lstm(lstm_inp)
        cache[path] = output

        return output * count
    
    def forward(self, data, emb_indexer):
        if not data:
            data[NULL_PATH] = 1
        print ("Data: ", data)
        num_paths = [sum(list(paths.values())) for paths in data]
        print ("Number of paths: ", num_paths)
        path_embeddings = [np.sum([self.embed_path(path) for path in paths.items()]) for paths in data]
        print ("Path Embeddings: ", path_embeddings)
        
        h = np.divide(path_embeddings, num_paths)
        h = [np.concatenate((self.word_embeddings(elem[0]), h[i], self.word_embeddings(elem[1]))) for i,emb in enumerate(emb_indexer)]
        return self.softmax(self.W(h))

HIDDEN_DIM = 60
NUM_LAYERS = 2
num_epochs = 3
batch_size = 10

dataset_size = len(y_train)
batch_size = min(batch_size, dataset_size)
num_batches = int(ceil(dataset_size/batch_size))

lr = 0.001
dropout = 0.3
lstm = LSTM()
criterion = nn.NLLLoss()
optimizer = optim.Adam(lstm.parameters(), lr=lr)

for epoch in range(num_epochs):
    
    total_loss, epoch_idx = 0, np.random.permutation(dataset_size)
    
    for batch_idx in range(num_batches):
        batch_end = (batch_idx+1) * batch_size
        batch_start = batch_idx * batch_size
        batch = epoch_idx[batch_start:batch_end]
        
        data, labels, embeddings_idx = x_train[batch], y_train[batch], embed_indices_train[batch]
        
        # Run the forward pass
        outputs = lstm(data, embeddings_idx)
        loss = criterion(outputs, labels)

        # Backprop and perform Adam optimisation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    total_loss /= dataset_size
    print('Epoch [{}/{}] Loss: {:.4f}'.format(epoch + 1, num_epochs, total_loss))
    loss_list.append(loss.item())

lstm.eval()
with torch.no_grad():
    predictedLabels = []
    for batch_idx in range(num_batches):
        outputs = lstm(data)
        print (outputs)
        _, predicted = torch.max(outputs.data, 1)
        predictedLabels.extend(predicted)

In [110]:
x

[{((38901, 1, 1, 1), (31, 2, 2, 2), (4045, 1, 3, 3), (5904, 2, 4, 4)): 1,
  ((38901, 1, 1, 1), (31, 2, 2, 2), (4045, 1, 3, 3)): 3},
 {},
 {},
 {},
 {},
 {}]

In [191]:
from torch import nn
import torch
e = nn.Embedding(3, 3)
ls = [[0, 1, 2], [3,4,5], [6,7,8]]
ls = np.array([np.array(el) for el in ls])
e.load_state_dict({'weight': torch.from_numpy(ls)})

<All keys matched successfully>

In [206]:
num_paths = [sum(list(paths.values())) for paths in data]
        print ("Number of paths: ", num_paths)
        path_embeddings = np.array([np.sum([self.embed_path(path) for path in paths.items()]) for paths in data])
        #print ("Path Embeddings: ", path_embeddings)
        
        h = np.divide(path_embeddings, num_paths)
        print (h.shape)
        h = [np.concatenate((self.word_embeddings(emb[0]), h[i], self.word_embeddings(emb[1]))) for i,emb in enumerate(emb_indexer)]

tensor([[1., 3., 4.]])

In [228]:
# t = torch.randn(1,4)
torch.Tensor([[1]]).shape
# torch.cat((h, t.view(1,-1)), 0)

torch.Size([1, 1])

In [233]:
loss = nn.CrossEntropyLoss()
inputt = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(inputt, target)
output.backward()
print (output, inputt, target)

tensor(1.5777, grad_fn=<NllLossBackward>) tensor([[ 1.1562, -0.1242,  0.1812,  0.3042,  0.9188],
        [ 1.0961,  1.5061,  0.4816, -0.1618, -1.0943],
        [ 0.8229,  0.5431, -0.4848,  1.8109, -0.8246]], requires_grad=True) tensor([4, 2, 0])


In [9]:
import time 
word = "margherita pizza" 

def extractUSEEmbeddings(words):
    try:
        embed = hub.KerasLayer(USE_folder)
    except Exception as e:
        !mkdir $USE_folder
        !curl -L "https://tfhub.dev/google/universal-sentence-encoder-large/5?tf-hub-format=compressed" | tar -zxvC $USE_folder
        embed = hub.KerasLayer(USE_folder)
        pass
#     tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
    word_embeddings = embed(words)
    return word_embeddings

def compare_sim(words, word_to_compare, max_sim=-1000):
    word_embeddings = extractUSEEmbeddings(words)
    closest_word = ""
    with shelve.open(use_embeddings, 'c') as db:
        for i, w in enumerate(word_embeddings):
            db[words[i]] = " ".join([str(s) for s in w])
        closest_word_idx = np.argmax(awesome_cossim_topn(word_embeddings, word_to_compare.transpose(), 10, 0.85))
        closest_word = words[closest_word_idx]
        del word_embeddings
    del db
    return closest_word

def closestWord(word, method="USE"):
    if method!="USE":
        return max([(w, SequenceMatcher(lambda x: x == " ", word, w).ratio()) for w in list(word2id_db.keys())], key=lambda l:float(l[1]))
    a = time.time()
    word_to_compare = extractUSEEmbeddings([word]).eval(session=tf.compat.v1.Session())
    print ("Took me {} seconds to extract USE embeddings...".format(time.time()-a))
    if os.path.isfile(use_embeddings):
        with shelve.open(use_embeddings, 'r') as db:
            embeds = [np.array([float(l) for l in e.split()]) for e in db.values()]
            words = list(db.keys())
            closest_word_idx = np.argmax(awesome_cossim_topn(embeds, word_to_compare.T, 10, 0.85))
            closest_word = words[closest_word_idx]
    else:
        words = list(word2id_db.keys())
        print ("Obtained list of words")
#         len_part = 100000
        max_sim = -1000
#         n_parts = ceil(len(words)/len_part)
#         for i in range(n_parts):
#             words_part = words[i*len_part:(i+1)*len_part]
        closest_word, max_sim = compare_sim(words, word_to_compare, max_sim)

    
    return closest_word, max_sim
    
closest_word = closestWord("wansecure firewall")
closest_word

NotImplementedError: eval is not supported when eager execution is enabled, is .numpy() what you're looking for?

In [19]:
import time
t = time.time()
import difflib
# closest_word, sim = closestWord("neopolitan pizza", "sm")
res = difflib.get_close_matches("neopolitan pizza", list(word2id_db.keys()))
print ("closest word:", res, "Calculated in: ", time.time() - t, "seconds")

closest word: ['neapolitan pizza', 'pan pizza', 'authentic neapolitan pizza'] Calculated in:  5.890500783920288 seconds


In [8]:
a = time.time()
print ("Took me {} seconds to extract USE embeddings...".format(time.time()-a))

Took me 0.00010180473327636719 seconds to extract USE embeddings...


In [21]:

def awesome_cossim_top(A, B, ntop, lower_bound=0):
    # force A and B as a CSR matrix.
    # If they have already been CSR, there is no overhead
    A = A.tocsr()
    B = B.tocsr()
    M, _ = A.shape
    _, N = B.shape
 
    idx_dtype = np.int32
 
    nnz_max = M*ntop
 
    indptr = np.zeros(M+1, dtype=idx_dtype)
    indices = np.zeros(nnz_max, dtype=idx_dtype)
    data = np.zeros(nnz_max, dtype=A.dtype)

    ct.sparse_dot_topn(
        M, N, np.asarray(A.indptr, dtype=idx_dtype),
        np.asarray(A.indices, dtype=idx_dtype),
        A.data,
        np.asarray(B.indptr, dtype=idx_dtype),
        np.asarray(B.indices, dtype=idx_dtype),
        B.data,
        ntop,
        lower_bound,
        indptr, indices, data)

    return csr_matrix((data,indices,indptr),shape=(M,N))



org_names = names['buyer'].unique()
vectorizer = TfidfVectorizer(min_df=1, analyzer=extract_ngrams)
tf_idf_matrix = vectorizer.fit_transform(org_names)

t1 = time.time()
matches = awesome_cossim_top(tf_idf_matrix, tf_idf_matrix.transpose(), 10, 0.85)
t = time.time()-t1


print('All 3-grams in "Department":')
print(extract_ngrams('Department'))

All 3-grams in "Department":
[' De', 'Dep', 'epa', 'par', 'art', 'rtm', 'tme', 'men', 'ent', 'nt ']


In [4]:
import re
from ftfy import fix_text
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse import csr_matrix
import sparse_dot_topn.sparse_dot_topn as ct
from sparse_dot_topn import awesome_cossim_topn

chars_to_remove = [")","(",".","|","[","]","{","}","'"]

def extract_ngrams(string, n=3):
    string = fix_text(string).encode("ascii", errors="ignore").decode().lower() # fix text
    string = string.replace('&', 'and').replace(',', ' ').replace('-', ' ').title()
    string = re.sub('[' + re.escape(''.join(chars_to_remove)) + ']', '', string)
    string = ' ' + re.sub(' +',' ',string).strip() + ' '
    string = re.sub(r'[,-./]|\sBD',r'', string)
    ngrams = zip(*[string[i:] for i in range(n)])
    ngrams = [''.join(ngram) for ngram in ngrams]
    return ngrams

word_to_match = "margherita pizza"
words = list(word2id_db.keys())
vectorizer = TfidfVectorizer(min_df=1, analyzer=extract_ngrams)
tf_idf_matrix = vectorizer.fit_transform(words + [word_to_match])

# d = awesome_cossim_topn(tf_idf_matrix, tf_idf_matrix.transpose(), 10, 0.85, use_threads=True, n_jobs=256)



In [8]:
d = awesome_cossim_topn(tf_idf_matrix[:-1], tf_idf_matrix[-1].transpose(), 10, 0.85, use_threads=True, n_jobs=256)

In [6]:
def get_matches_df(sparse_matrix, name_vector, top=100):
    non_zeros = sparse_matrix.nonzero()
    
    sparserows = non_zeros[0]
    sparsecols = non_zeros[1]
    
    if top:
        nr_matches = top
    else:
        nr_matches = sparsecols.size
    
    left_side = np.empty([nr_matches], dtype=object)
    right_side = np.empty([nr_matches], dtype=object)
    similairity = np.zeros(nr_matches)
    print (sparserows)
    for index in range(0, nr_matches):
        left_side[index] = name_vector[sparserows[index]]
        right_side[index] = name_vector[sparsecols[index]]
        similairity[index] = sparse_matrix.data[index]
    
    return pd.DataFrame({'left_side': left_side,
                          'right_side': right_side,
                           'similairity': similairity})

In [13]:
import time
start = time.time()
d = awesome_cossim_topn(tf_idf_matrix[:-1], tf_idf_matrix[-1].transpose(), 10, 0.85, use_threads=True, n_jobs=256)
words[np.argmax(d)]
print ("time: ", start - time.time())

time:  -0.3905460834503174


In [9]:
relations_db_new = shelve.open(prefix + "_relations_map.db", "c")
for k, v in relations_db.items():
    relations_db_new["###".join(k.split("_"))] = v
relations_db_new.close()

In [None]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms


train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])),
    batch_size=batch_size, shuffle=True)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 200)
        self.fc2 = nn.Linear(200, 200)
        self.fc3 = nn.Linear(200, 10)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x)

net = Net()
print(net)

# create a stochastic gradient descent optimizer
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
# create a loss function
criterion = nn.NLLLoss()

# run the main training loop
for epoch in range(epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        # resize data from (batch_size, 1, 28, 28) to (batch_size, 28*28)
        data = data.view(-1, 28*28)
        optimizer.zero_grad()
        net_out = net(data)
        loss = criterion(net_out, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                       100. * batch_idx / len(train_loader), loss.data[0]))

# run a test loop
test_loss = 0
correct = 0
for data, target in test_loader:
    data, target = Variable(data, volatile=True), Variable(target)
    data = data.view(-1, 28 * 28)
    net_out = net(data)
    # sum up batch loss
    test_loss += criterion(net_out, target).data[0]
    pred = net_out.data.max(1)[1]  # get the index of the max log-probability
    correct += pred.eq(target.data).sum()

test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

In [11]:
!pip3 install torchvision

Defaulting to user installation because normal site-packages is not writeable
Collecting torchvision
  Downloading torchvision-0.5.0-cp36-cp36m-manylinux1_x86_64.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 253 kB/s eta 0:00:01
Collecting pillow>=4.1.1
  Downloading Pillow-7.1.1-cp36-cp36m-manylinux1_x86_64.whl (2.1 MB)
[K     |████████████████████████████████| 2.1 MB 33.6 MB/s eta 0:00:01
[?25hInstalling collected packages: pillow, torchvision
Successfully installed pillow-7.1.1 torchvision-0.5.0
