In [2]:
import torch
import numpy as np
import pandas as pd
from src.preprocessing import remove_stop_words
from src.model_architectures.characterbert_classifier import SiameseNetwork
from characterbert_modeling.character_bert import CharacterBertModel

In [87]:
character_bert_model = CharacterBertModel.from_pretrained('./pretrained-models/general_character_bert/')

In [92]:
net = SiameseNetwork()

In [88]:
net.bert = character_bert_model

In [103]:
df = pd.read_csv('data/train/total_data.csv')

In [105]:
label_0 = [0, 0, 0]
label_1 = [0, 0, 0]
token = '256 gb'
for row in df.iloc:
    if token in row.title_one and token in row.title_two:
        if row.label == 1:
            label_1[0] += 1
        else:
            label_0[0] += 1
    
    elif token in row.title_one and token not in row.title_two:
        if row.label == 1:
            label_1[1] += 1
        else:
            label_0[1] += 1
    
    elif token not in row.title_one and token in row.title_two:
        if row.label == 1:
            label_1[2] += 1
        else:
            label_0[2] += 1

print("In both titles: ", label_0[0], label_1[0])
print("In title one, not title 2: ", label_0[1], label_1[1])
print("In title two not title 1: ", label_0[2], label_1[2])

In both titles:  216 281
In title one, not title 2:  72 5
In title two not title 1:  85 11


In [114]:
net.load_state_dict(torch.load('./models/CharacterBERT_0.2.3.1/CharacterBERT_epoch4.pt'))
#net = torch.load('./models/BERT/0.2.1.1_BERT_epoch_3.pt')
net.eval()
print('Loaded Model')

Loaded Model


In [84]:
def cosine_sim():
    title1 = input('First title: ')
    title2 = input('Second title: ')
    
    title1 = "[CLS] " + remove_stop_words(title1) + " [SEP]"
    title2 = "[CLS] " + remove_stop_words(title2) + " [SEP]"

    # We need to split up each token in the title by the space
    # So, "intel core i7 7700k" becomes ["intel", "core", "i7", "7700k"]
    input1 = title1.split(" ")
    input2 = title2.split(" ")
    
    # Now, we feed the input into the CharacterBERT tokenizer, which converts each 
    input1 = net.character_indexer.as_padded_tensor([input1])
    input2 = net.character_indexer.as_padded_tensor([input2])
    
    out1 = net.bert(input1)[1]
    out2 = net.bert(input2)[1]
    
    return torch.nn.CosineSimilarity(dim=1, eps=1e-6)(out1, out2)

In [89]:
cosine_sim()

First title: 16 gb
Second title: 16gb


tensor([0.9890], grad_fn=<DivBackward0>)

In [5]:
def inference():
    title1 = input('First title: ')
    title2 = input('Second title: ')
    
    title1 = remove_stop_words(title1)
    title2 = remove_stop_words(title2)
    
    data = np.array([title1, title2]).reshape(1, 2)
    forward = net(data)
    np_forward = forward.detach().numpy()[0]
    
    print('Output: {}'.format(torch.argmax(forward)))
    print('Softmax: Negative {:.4f}%, Positive {:.4f}%'.format(np_forward[0], np_forward[1]))

In [94]:
inference()

First title: ASUS ZenBook 14 Ultra-Slim Laptop 14" Full HD NanoEdge Bezel, Intel Core i7-1065G7, 8 GB RAM, 512 GB PCIe SSD, NumberPad, Thunderbolt, Windows Home, Pine Grey
Second title: ASUS - ZenBook 14"" Ultra-Slim FHD Notebook - i7-1065G7 - 8 GB 512 GB in Pine Grey - Pine Grey
Output: 0
Softmax: Negative 1.0000%, Positive 0.0000%


In [78]:
inference()

First title: ASUS ZenBook 14 Ultra-Slim Laptop 14" Full HD NanoEdge Bezel, Intel Core i7-1065G7, 8 GB RAM, 512 GB PCIe SSD, NumberPad, Thunderbolt, Windows Home, Pine Grey
Second title: ASUS - ZenBook 14"" Ultra-Slim FHD Notebook - i7-1065G7 - 8 GB 512 GB in Pine Grey - Pine Grey
Output: 1
Softmax: Negative 0.0357%, Positive 0.9643%


In [11]:
inference()

First title: Lenovo Yoga C740 14" Full HD 1080p Touchscreen Laptop PC, Intel Core i5-10210U Quad Core Processor, 8GB DDR4 RAM, 256GB SSD, Backlit Keyboard, Webcam, WiFi, Bluetooth, Windows 10, Mica
Second title: Lenovo Yoga 14.0" Full HD 2-in-1 Touchscreen Notebook, 10th Gen Intel Core i5-10210U, 8GB DDR4, 256GB SSD, Intel UHD Graphics, Wifi-AC, Bluetooth, USB, Fingerprint Reader, Windows 10
Output: 1
Softmax: Negative 0.0444%, Positive 0.9556%


In [10]:
inference()

First title: Lenovo Yoga C740 2-in-1 14" Full HD 1080p Touchscreen Laptop PC, Intel Core i5-10210U Quad Core Processor, 8GB DDR4 RAM, 256GB SSD, Backlit Keyboard, Webcam, WiFi, Bluetooth, Windows 10, Mica
Second title: Lenovo Yoga 14.0" Full HD 2-in-1 Touchscreen Notebook, 10th Gen Intel Core i5-10210U, 8GB DDR4, 256GB SSD, Intel UHD Graphics, Wifi-AC, Bluetooth, USB, Fingerprint Reader, Windows 10
Output: 1
Softmax: Negative 0.0433%, Positive 0.9567%


hp chromebook 15 4"" intel core i7 4770k cpu 480 gb hdd 104 gb ram

In [23]:
inference()

First title: ASUS ZenBook 14 Ultra-Slim Laptop 14" Full HD NanoEdge Bezel, Intel Core i7-1065G7, 8 GB RAM, 512 GB PCIe SSD, NumberPad, Thunderbolt, Windows Home, Pine Grey
Second title: ASUS - ZenBook 14"" Ultra-Slim FHD Notebook - i7-1065G7 - 8 GB 512 GB in Pine Grey - Pine Grey
Output: 1
Softmax: Negative 0.3740%, Positive 0.6260%


In [25]:
inference()

First title: ASUS ZenBook 14 Ultra-Slim Laptop 14" Full HD NanoEdge Bezel, Intel Core i7-1065G7, 8GB RAM, 512GB PCIe SSD, NumberPad, Thunderbolt, Windows Home, Pine Grey
Second title: ASUS - ZenBook 14"" Ultra-Slim FHD Notebook - i7-1065G7 - 8 GB 512 GB in Pine Grey - Pine Grey
Output: 1
Softmax: Negative 0.4696%, Positive 0.5304%


In [36]:
inference()

First title: ASUS ZenBook 14 Ultra-Slim Laptop 14" Full HD NanoEdge Bezel, Intel Core i7-1065G7, 8GB RAM, 512GB PCIe SSD, NumberPad, Thunderbolt, Windows 10 Home, Pine Grey
Second title: ASUS - ZenBook 14"" Ultra-Slim FHD Notebook - i7-1065G7 - 8 GB 512 GB in Pine Grey - Pine Grey
Output: 1
Softmax: Negative 0.0491%, Positive 0.9509%


In [37]:
inference()

First title: ASUS ZenBook 14 Ultra-Slim Laptop 14" Full HD NanoEdge Bezel, Intel Core i7-1065G7, 8 GB RAM, 512 GB PCIe SSD, NumberPad, Thunderbolt, Windows 10 Home, Pine Grey
Second title: ASUS - ZenBook 14"" Ultra-Slim FHD Notebook - i7-1065G7 - 8 GB 512 GB in Pine Grey - Pine Grey
Output: 1
Softmax: Negative 0.4937%, Positive 0.5063%


In [38]:
inference()

First title: ASUS ZenBook 14 Ultra-Slim Laptop 14" Full HD NanoEdge Bezel, Intel Core i7-1065G7, 8 GB RAM, 512 GB PCIe SSD, NumberPad, Thunderbolt, Windows Home, Pine Grey
Second title: ASUS - ZenBook 14"" Ultra-Slim FHD Notebook - i7-1065G7 - 8 GB 512 GB in Pine Grey - Pine Grey
Output: 1
Softmax: Negative 0.4747%, Positive 0.5253%


In [58]:
inference()

First title: ASUS ZenBook 14 Ultra-Slim Laptop 14" Full HD NanoEdge Bezel, Intel Core i7-1065G7, 8GB RAM, 512GB PCIe SSD, NumberPad, Thunderbolt, Windows Home, Pine Grey
Second title: ASUS - ZenBook 14"" Ultra-Slim FHD Notebook - i7-1065G7 - 8 GB 512 GB in Pine Grey - Pine Grey
Output: 1
Softmax: Negative 0.4696%, Positive 0.5304%


In [64]:
inference()

First title: Lenovo Yoga C740 14" Full HD 1080p Touchscreen Laptop PC, Intel Core i5-10210U Quad Core Processor, 8GB DDR4 RAM, 256GB SSD, Backlit Keyboard, Webcam, WiFi, Bluetooth, Windows 10, Mica
Second title: Lenovo Yoga 14.0" Full HD 2-in-1 Touchscreen Notebook, 10th Gen Intel Core i5-10210U, 8GB DDR4, 256GB SSD, Intel UHD Graphics, Wifi-AC, Bluetooth, USB, Fingerprint Reader, Windows 10
Output: 1
Softmax: Negative 0.3194%, Positive 0.6806%


In [65]:
inference()

First title: Lenovo Yoga 14.0" Full HD 2-in-1 Touchscreen Notebook, 10th Gen Intel Core i5-10210U, 8 GB DDR4, 256 GB SSD, Intel UHD Graphics, Wifi-AC, Bluetooth, USB, Fingerprint Reader, Windows 10
Second title: Lenovo Yoga 14.0" Full HD 2-in-1 Touchscreen Notebook, 10th Gen Intel Core i5-10210U, 8 GB DDR4, 256 GB SSD, Intel UHD Graphics, Wifi-AC, Bluetooth, USB, Fingerprint Reader, Windows 10
Output: 1
Softmax: Negative 0.0352%, Positive 0.9648%


In [67]:
inference()

First title: Lenovo Yoga 14.0" Full HD 2-in-1 Touchscreen Notebook, 10th Gen Intel Core i5-10210U, 8 GB DDR4, 256 GB SSD, Intel UHD Graphics, Wifi-AC, Bluetooth, USB, Fingerprint Reader, Windows 10
Second title: Lenovo Yoga 14.0" Full HD 2-in-1 Touchscreen Notebook, 10th Gen Intel Core i5-10210U, 8 GB DDR4, 256 GB SSD, Intel UHD Graphics, Wifi-AC, Bluetooth, USB, Fingerprint Reader,
Output: 1
Softmax: Negative 0.0353%, Positive 0.9647%


In [69]:
inference()

First title: ASUS ZenBook 14 Ultra-Slim Laptop 14" Full HD NanoEdge Bezel, Intel Core i7-1065G7, 8 GB RAM, 512 GB PCIe SSD, NumberPad, Thunderbolt 3, Windows 10 Home, Pine Grey, UX425JA-EB71
Second title: "ASUS - ZenBook 14"" Ultra-Slim FHD Notebook - i7-1065G7 - 8GB 512GB in Pine Grey - Pine Grey "
Output: 1
Softmax: Negative 0.0353%, Positive 0.9647%


In [71]:
inference()

First title: "ASUS - ZenBook 14"" Ultra-Slim FHD Notebook - i7-1065G7 - 8 GB 512 GB in Pine Grey - Pine Grey "
Second title: ASUS ZenBook 14 Ultra-Slim Laptop 14" Full HD NanoEdge Bezel, Intel Core i7-1065G7, 8 GB RAM, 512 GB PCIe SSD, NumberPad, Thunderbolt 3, Windows 10 Home, Pine Grey, UX425JA-EB71
Output: 1
Softmax: Negative 0.0353%, Positive 0.9647%


In [72]:
inference()

First title: intel core i7 7700k
Second title: intel core i7 8700k
Output: 0
Softmax: Negative 0.9625%, Positive 0.0375%


In [73]:
inference()

First title: "HP EliteBook x360 1030 G4 13.3-Inch Touchscreen Laptop with Verizon/AT&T Compatible 4G LTE Wireless Feature (i7-8665U Processor, WiFi+BT5, 512GB SSD, 16GB RAM, HD-IR Camera) Windows 10 Pro "
Second title: "HP 13.3"" EliteBook x360 1030 G4 Multi-Touch 2-in-1 Notebook - 13.3"" FHD UWVA Touchscreen - 1.9 GHz Intel Core i7-8665U Quad-Core - 512GB SSD - 16GB - Windows 10 pro (8TJ80UC) "
Output: 1
Softmax: Negative 0.0356%, Positive 0.9644%


In [74]:
inference()

First title: "HP 13.3"" EliteBook x360 1030 G4 Multi-Touch 2-in-1 Notebook - 13.3"" FHD UWVA Touchscreen - 1.9 GHz Intel Core i7-8665U Quad-Core - 512 GB SSD - 16 GB - Windows 10 pro (8TJ80UC) "
Second title: HP EliteBook x360 1030 G4 13.3-Inch Touchscreen Laptop with Verizon/AT&T Compatible 4G LTE Wireless Feature (i7-8665U Processor, WiFi+BT5, 512 GB SSD, 16 GB RAM, HD-IR Camera) Windows 10 Pro
Output: 1
Softmax: Negative 0.0371%, Positive 0.9629%


In [77]:
inference()

First title: corsair 16gb rgb memory
Second title: corsair rgb 16 gb ram
Output: 1
Softmax: Negative 0.4994%, Positive 0.5006%


In [113]:
inference()

First title: HP 13.3"" EliteBook x360 1030 G4 Multi-Touch 2-in-1 Notebook - 13.3"" FHD UWVA Touchscreen - 1.9 GHz Intel Core i7-8665U Quad-Core - 512 GB SSD - 16 GB - Windows 10 pro (8TJ80UC)
Second title: HP EliteBook x360 1030 G4 13.3-Inch Touchscreen Laptop with Verizon/AT&T Compatible 4G LTE Wireless Feature (i7-8665U Processor, WiFi+BT5, 512 GB SSD, 16 GB RAM, HD-IR Camera) Windows 10 Pro
Output: 1
Softmax: Negative 0.0338%, Positive 0.9662%
