In [6]:
#pip install sentence_transformers

In [7]:
from torch.utils.data import DataLoader
from sentence_transformers import models, losses, util
from sentence_transformers import SentencesDataset, LoggingHandler, SentenceTransformer
from sentence_transformers.cross_encoder import CrossEncoder
from sentence_transformers.cross_encoder.evaluation import CECorrelationEvaluator
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
from sentence_transformers.readers import InputExample
from datetime import datetime
import logging
import csv
import torch
import tqdm
import sys
import math
import gzip
import os
import pandas as pd

In [8]:
#### Just some code to print debug information to stdout
logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.INFO,
                    handlers=[LoggingHandler()])
#### /print debug information to stdout

In [9]:
#You can specify any huggingface/transformers pre-trained model here, for example, bert-base-uncased, roberta-base, xlm-roberta-basae
model_name = 'roberta-base'
top_k = len(sys.argv) if len(sys.argv) > 2 else 3 #top_k는 전체 문장 후보군의 개수를 넘지 않아야 하므로

batch_size = 16
num_epochs = 1
max_seq_length = 512

In [10]:
###### Read Datasets ######

#Check if dataset exsist. If not, download and extract  it
sts_dataset_path = 'datasets/stsbenchmark.tsv.gz'

if not os.path.exists(sts_dataset_path):
    util.http_get('https://sbert.net/datasets/stsbenchmark.tsv.gz', sts_dataset_path)

cross_encoder_path = 'output/cross-encoder/stsb_indomain_'+model_name.replace("/", "-")+'-'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
bi_encoder_path = 'output/bi-encoder/stsb_augsbert_BM25_'+model_name.replace("/", "-")+'-'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

In [11]:
###### Cross-encoder (simpletransformers) ######
logging.info("Loading sentence-transformers model: {}".format(model_name))
# Use Huggingface/transformers model (like BERT, RoBERTa, XLNet, XLM-R) for cross-encoder model
cross_encoder = CrossEncoder(model_name, num_labels=1)


###### Bi-encoder (sentence-transformers) ######
logging.info("Loading bi-encoder model: {}".format(model_name))
# Use Huggingface/transformers model (like BERT, RoBERTa, XLNet, XLM-R) for mapping tokens to embeddings
word_embedding_model = models.Transformer(model_name, max_seq_length=max_seq_length)

# Apply mean pooling to get one fixed sized sentence vector
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension())

bi_encoder = SentenceTransformer(modules=[word_embedding_model, pooling_model])


2021-10-22 08:50:03 - Loading sentence-transformers model: roberta-base


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifi

2021-10-22 08:50:17 - Use pytorch device: cpu
2021-10-22 08:50:17 - Loading bi-encoder model: roberta-base


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


2021-10-22 08:50:30 - Use pytorch device: cpu


In [12]:
#####################################################################
#
# Step 1: Train cross-encoder model with (gold) STS benchmark dataset
#
#####################################################################

logging.info("Step 1: Train cross-encoder: ({}) with STSbenchmark".format(model_name))

gold_samples = []
dev_samples = []
test_samples = []

2021-10-22 08:50:30 - Step 1: Train cross-encoder: (roberta-base) with STSbenchmark


In [13]:
with gzip.open(sts_dataset_path, 'rt', encoding='utf8') as fIn:
    reader = csv.DictReader(fIn, delimiter='\t', quoting=csv.QUOTE_NONE)
    for row in reader:
        score = float(row['score']) / 5.0  # Normalize score to range 0 ... 1

        if row['split'] == 'dev':
            dev_samples.append(InputExample(texts=[row['sentence1'], row['sentence2']], label=score))
        elif row['split'] == 'test':
            test_samples.append(InputExample(texts=[row['sentence1'], row['sentence2']], label=score))
        else:
            #As we want to get symmetric scores, i.e. CrossEncoder(A,B) = CrossEncoder(B,A), we pass both combinations to the train set
            gold_samples.append(InputExample(texts=[row['sentence1'], row['sentence2']], label=score))
            gold_samples.append(InputExample(texts=[row['sentence2'], row['sentence1']], label=score))

In [14]:
# We wrap gold_samples (which is a List[InputExample]) into a pytorch DataLoader
train_dataloader = DataLoader(gold_samples, shuffle=True, batch_size=batch_size)


# We add an evaluator, which evaluates the performance during training
evaluator = CECorrelationEvaluator.from_input_examples(dev_samples, name='sts-dev')

# Configure the training
warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1) #10% of train data for warm-up
logging.info("Warmup-steps: {}".format(warmup_steps))

# Train the cross-encoder model
cross_encoder.fit(train_dataloader=train_dataloader,
          evaluator=evaluator,
          epochs=num_epochs,
          warmup_steps=warmup_steps,
          output_path=cross_encoder_path)

2021-10-22 08:50:30 - Warmup-steps: 72


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/719 [00:00<?, ?it/s]

2021-10-22 11:31:57 - CECorrelationEvaluator: Evaluating the model on sts-dev dataset after epoch 0:
2021-10-22 11:36:33 - Correlation:	Pearson: 0.9047	Spearman: 0.9015
2021-10-22 11:36:33 - Save model to output/cross-encoder/stsb_indomain_roberta-base-2021-10-22_08-50-03


In [15]:
index_name = "stsb" # index-name should be in lowercase
logging.info("Step 2.1: Generate STSbenchmark (silver dataset) using top-{} bm25 combinations".format(top_k))

unique_sentences = set()

for sample in gold_samples:
    unique_sentences.update(sample.texts)

unique_sentences = list(unique_sentences) # unique sentences
sent2idx = {sentence: idx for idx, sentence in enumerate(unique_sentences)} # storing id and sentence in dictionary
duplicates = set((sent2idx[data.texts[0]], sent2idx[data.texts[1]]) for data in gold_samples) # not to include gold pairs of sentences again


2021-10-22 11:36:35 - Step 2.1: Generate STSbenchmark (silver dataset) using top-3 bm25 combinations


In [16]:
############################################################################
#
# Step 2: Find silver pairs to label
#
############################################################################

#### Top k similar sentences to be retrieved ####
#### Larger the k, bigger the silver dataset ####

logging.info("Step 2.1: Generate STSbenchmark (silver dataset) using pretrained SBERT \
    model and top-{} semantic search combinations".format(top_k))



2021-10-22 11:36:37 - Step 2.1: Generate STSbenchmark (silver dataset) using pretrained SBERT     model and top-3 semantic search combinations


In [17]:
silver_data = []
sentences = set()
# df_claim_oneclaim = pd.read_csv ('/home/ubuntu/deeppatentsimilarity/patentdata/df_claims_oneclaim_500.csv', encoding="ISO-8859-1")
#df_claim_oneclaim = pd.read_csv ('/home/ubuntu/deeppatentsimilarity/patentdata/df_claims_oneclaim_1000.csv', encoding="ISO-8859-1")
#df_claim_oneclaim['text'] = df_claim_oneclaim['text'].str[3:]
#sentences = df_claim_oneclaim.text.to_list()
#sent2idx = dict(zip(df_claim_oneclaim.text, df_claim_oneclaim.patent_id))
# duplicates  = pd.read_csv ('/home/ubuntu/deeppatentsimilarity/patentdata/duplicates_4.csv', encoding="ISO-8859-1")

for sample in gold_samples:
    sentences.update(sample.texts)

sentences = list(sentences) # unique sentences
sent2idx = {sentence: idx for idx, sentence in enumerate(sentences)} # storing id and sentence in dictionary
duplicates = set((sent2idx[data.texts[0]], sent2idx[data.texts[1]]) for data in gold_samples) # not to include gold pairs of sentences again


In [18]:
# For simplicity we use a pretrained model
semantic_model_name = 'roberta-base-nli-stsb-mean-tokens'
semantic_search_model = SentenceTransformer(semantic_model_name)
logging.info("Encoding unique sentences with semantic search model: {}".format(semantic_model_name))


2021-10-22 11:36:37 - Load pretrained SentenceTransformer: roberta-base-nli-stsb-mean-tokens
2021-10-22 11:36:55 - Use pytorch device: cpu
2021-10-22 11:36:55 - Encoding unique sentences with semantic search model: roberta-base-nli-stsb-mean-tokens


In [19]:
# encoding all unique sentences present in the training dataset
embeddings = semantic_search_model.encode(sentences, batch_size=batch_size, convert_to_tensor=True)

Batches:   0%|          | 0/661 [00:00<?, ?it/s]

In [20]:
logging.info("Retrieve top-{} with semantic search model: {}".format(top_k, semantic_model_name))

2021-10-22 11:49:37 - Retrieve top-3 with semantic search model: roberta-base-nli-stsb-mean-tokens


In [21]:
from tqdm.notebook import tqdm

In [22]:
silver_data = []
sentences = set()

for sample in gold_samples:
    sentences.update(sample.texts)

sentences = list(sentences) # unique sentences
sent2idx = {sentence: idx for idx, sentence in enumerate(sentences)} # storing id and sentence in dictionary
duplicates = set((sent2idx[data.texts[0]], sent2idx[data.texts[1]]) for data in gold_samples) # not to include gold pairs of sentences again

# retrieving top-k sentences given a sentence from the dataset
for idx in tqdm(range(len(sentences)),unit="docs"):
    sentence_embedding = embeddings[idx]
    cos_scores = util.pytorch_cos_sim(sentence_embedding, embeddings)[0]
    cos_scores = cos_scores.cpu()
    top_results = torch.topk(cos_scores, k=top_k+1)
    for score, iid in zip(top_results[0], top_results[1]):
        if iid != idx and (iid, idx) not in duplicates:
            silver_data.append((sentences[idx], sentences[iid]))
            duplicates.add((idx,iid))


  0%|          | 0/10566 [00:00<?, ?docs/s]

In [23]:
logging.info("Length of silver_dataset generated: {}".format(len(silver_data)))
logging.info("Step 2.2: Label STSbenchmark (silver dataset) with cross-encoder: {}".format(model_name))
cross_encoder = CrossEncoder(cross_encoder_path)
silver_scores = cross_encoder.predict(silver_data)

# All model predictions should be between [0,1]
assert all(0.0 <= score <= 1.0 for score in silver_scores)

2021-10-22 11:57:52 - Length of silver_dataset generated: 31698
2021-10-22 11:57:52 - Step 2.2: Label STSbenchmark (silver dataset) with cross-encoder: roberta-base
2021-10-22 11:57:54 - Use pytorch device: cpu


Batches:   0%|          | 0/991 [00:00<?, ?it/s]

In [24]:
############################################################################################
#
# Step 3: Train bi-encoder model with both STSbenchmark and labeled AllNlI - Augmented SBERT
#
############################################################################################

In [25]:
logging.info("Step 3: Train bi-encoder: {} with STSbenchmark (gold + silver dataset)".format(model_name))

# Convert the dataset to a DataLoader ready for training
logging.info("Read STSbenchmark gold and silver train dataset")
silver_samples = list(InputExample(texts=[data[0], data[1]], label=score) for \
    data, score in zip(silver_data, silver_scores))

train_dataset = SentencesDataset(gold_samples + silver_samples, bi_encoder)
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
train_loss = losses.CosineSimilarityLoss(model=bi_encoder)

logging.info("Read STSbenchmark dev dataset")
evaluator = EmbeddingSimilarityEvaluator.from_input_examples(dev_samples, name='sts-dev')

2021-10-22 13:55:39 - Step 3: Train bi-encoder: roberta-base with STSbenchmark (gold + silver dataset)
2021-10-22 13:55:39 - Read STSbenchmark gold and silver train dataset
2021-10-22 13:55:39 - Read STSbenchmark dev dataset


In [26]:
# Configure the training.
warmup_steps = math.ceil(len(train_dataset) * num_epochs / batch_size * 0.1) #10% of train data for warm-up
logging.info("Warmup-steps: {}".format(warmup_steps))

# Train the bi-encoder model
bi_encoder.fit(train_objectives=[(train_dataloader, train_loss)],
          evaluator=evaluator,
          epochs=num_epochs,
          evaluation_steps=1000,
          warmup_steps=warmup_steps,
          output_path=bi_encoder_path
          )

2021-10-22 13:55:39 - Warmup-steps: 270


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/2700 [00:00<?, ?it/s]

2021-10-22 17:10:25 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 0 after 1000 steps:
2021-10-22 17:13:53 - Cosine-Similarity :	Pearson: 0.8759	Spearman: 0.8751
2021-10-22 17:13:53 - Manhattan-Distance:	Pearson: 0.8513	Spearman: 0.8526
2021-10-22 17:13:53 - Euclidean-Distance:	Pearson: 0.8517	Spearman: 0.8533
2021-10-22 17:13:53 - Dot-Product-Similarity:	Pearson: 0.8015	Spearman: 0.8089
2021-10-22 17:13:53 - Save model to output/bi-encoder/stsb_augsbert_BM25_roberta-base-2021-10-22_08-50-03
2021-10-22 20:10:34 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 0 after 2000 steps:
2021-10-22 20:14:40 - Cosine-Similarity :	Pearson: 0.8829	Spearman: 0.8809
2021-10-22 20:14:40 - Manhattan-Distance:	Pearson: 0.8614	Spearman: 0.8626
2021-10-22 20:14:40 - Euclidean-Distance:	Pearson: 0.8620	Spearman: 0.8634
2021-10-22 20:14:40 - Dot-Product-Similarity:	Pearson: 0.8175	Spearman: 0.8217
2021-10-22 20:14:40 - Save model to output/b

In [27]:
#################################################################################
#
# Evaluate cross-encoder and Augmented SBERT performance on STS benchmark dataset
#
#################################################################################

# load the stored augmented-sbert model
bi_encoder = SentenceTransformer(bi_encoder_path)
test_evaluator = EmbeddingSimilarityEvaluator.from_input_examples(test_samples, name='sts-test')
test_evaluator(bi_encoder, output_path=bi_encoder_path)

2021-10-22 22:44:50 - Load pretrained SentenceTransformer: output/bi-encoder/stsb_augsbert_BM25_roberta-base-2021-10-22_08-50-03
2021-10-22 22:44:52 - Use pytorch device: cpu
2021-10-22 22:44:52 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-test dataset:
2021-10-22 22:46:47 - Cosine-Similarity :	Pearson: 0.8554	Spearman: 0.8471
2021-10-22 22:46:47 - Manhattan-Distance:	Pearson: 0.8351	Spearman: 0.8319
2021-10-22 22:46:47 - Euclidean-Distance:	Pearson: 0.8362	Spearman: 0.8326
2021-10-22 22:46:47 - Dot-Product-Similarity:	Pearson: 0.7639	Spearman: 0.7569


0.8471109552894693

In [28]:
####AugSBERT+KNN

In [29]:
import scipy.spatial
import numpy as np
import os, json
import glob
import re
import torch
import pandas as pd
from sentence_transformers import SentenceTransformer
from tokenizers import Tokenizer
from datetime import datetime
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
torch.manual_seed(1)
import pandas as pd
import torch
import random
import itertools

In [30]:
F1Measure_list = []
Recall_list = []
Accuracy_list = []
Precision_list = []
Hamming_Loss_list = []

In [31]:
ls c:/

스위치가 틀립니다 - "".


In [32]:
def get_top_n_similar_patents_df(new_claim, claims):

    embedder = SentenceTransformer('output/bi-encoder/stsb_augsbert_BM25_roberta-base-2021-10-22_08-50-03')
    query_embeddings = embedder.encode([new_claim])
    claim_embeddings = embedder.encode(claims)
    top_n = 40
    distances = scipy.spatial.distance.cdist(query_embeddings, claim_embeddings, "cosine")[0]
    results = zip(range(len(distances)), distances)
    results = sorted(results, key=lambda x: x[1])
    top_claim_ids = []
    top_claims = []
    top_similarity_scores = []

    # Find the closest 100 patent claims for each new_claim based on cosine similarity
    for idx, distance in results[0:top_n]:
        top_claim_ids.append(patent_id[idx])
        top_claims.append(claims[idx])
        top_similarity_scores.append(round((1-distance), 4))
        print('Patent ID: ' + str(patent_id[idx]))
        print('PubMed Claim: ' + claims[idx])
        print('Similarity Score: ' + "%.4f" % (1-distance))
        print('\n')
        
    top_100_similar_patents_df = pd.DataFrame({
        'top_claim_ids': top_claim_ids,
        'cosine_similarity': top_similarity_scores,
        'claims': top_claims,
    })
    
    return top_100_similar_patents_df

def F1Measure(y_true, y_pred):
    temp = 0
    for i in range(y_true.shape[0]):
        if (sum(y_true[i]) == 0) and (sum(y_pred[i]) == 0):
            continue
        temp+= (2*sum(np.logical_and(y_true[i], y_pred[i])))/ (sum(y_true[i])+sum(y_pred[i]))
    return temp/ y_true.shape[0]

def Recall(y_true, y_pred):
    temp = 0
    for i in range(y_true.shape[0]):
        if sum(y_pred[i]) == 0:
            continue
        temp+= sum(np.logical_and(y_true[i], y_pred[i]))/ sum(y_pred[i])
    return temp/ y_true.shape[0]

def Precision(y_true, y_pred):
    temp = 0
    for i in range(y_true.shape[0]):
        if sum(y_true[i]) == 0:
            continue
        temp+= sum(np.logical_and(y_true[i], y_pred[i]))/ sum(y_true[i])
    return temp/ y_true.shape[0]

def Hamming_Loss(y_true, y_pred):
    temp=0
    for i in range(y_true.shape[0]):
        temp += np.size(y_true[i] == y_pred[i]) - np.count_nonzero(y_true[i] == y_pred[i])
    return temp/(y_true.shape[0] * y_true.shape[1])

def Accuracy(y_true, y_pred):
    temp = 0
    for i in range(y_true.shape[0]):
        temp += sum(np.logical_and(y_true[i], y_pred[i])) / sum(np.logical_or(y_true[i], y_pred[i]))
    return temp / y_true.shape[0]

df_claim_cpc_test = pd.read_csv('C:/Users/강지연/cpc_test_60.csv', encoding='ISO-8859-1')
df_claim_cpc_train = pd.read_csv('C:/Users/강지연/cpc_training_234.csv', encoding='ISO-8859-1')
# df_claim_cpc_train_1000 = pd.read_csv('/home/ubuntu/deeppatentsimilarity/patentdata/df_claim_cpc_all_len_150_200_1000.csv', encoding='ISO-8859-1')
# df_claim_cpc_test = pd.read_csv('/home/ubuntu/deeppatentsimilarity/patentdata/prelabel/NewTest/df_1_L_43259_test_100.csv', encoding='ISO-8859-1')
# df_claim_cpc_train = pd.read_csv('/home/ubuntu/deeppatentsimilarity/patentdata/prelabel/NewTest/df_1_L_43259.csv', encoding='ISO-8859-1')

claims = list(df_claim_cpc_train.text)
patent_id = list(df_claim_cpc_train.patent_id)

listofpredictdfs = []

In [41]:
def get_top_n_similar_patents_df(new_claim, claims):

    embedder = SentenceTransformer('output/bi-encoder/stsb_augsbert_BM25_roberta-base-2021-10-22_08-50-03')
    query_embeddings = embedder.encode([new_claim])
    claim_embeddings = embedder.encode(claims)
    top_n = 40
    distances = scipy.spatial.distance.cdist(query_embeddings, claim_embeddings, "cosine")[0]
    results = zip(range(len(distances)), distances)
    results = sorted(results, key=lambda x: x[1])
    top_claim_ids = []
    top_claims = []
    top_similarity_scores = []

    print("New_claim: " + new_claim + '\n')

    # Find the closest 100 patent claims for each new_claim based on cosine similarity
    for idx, distance in results[0:top_n]:
        top_claim_ids.append(patent_id[idx])
        top_claims.append(claims[idx])
        top_similarity_scores.append(round((1-distance), 4))
        print('Patent ID: ' + str(patent_id[idx]))
        print('PubMed Claim: ' + claims[idx])
        print('Similarity Score: ' + "%.4f" % (1-distance))
        print('\n')
        
    top_100_similar_patents_df = pd.DataFrame({
        'top_claim_ids': top_claim_ids,
        'cosine_similarity': top_similarity_scores,
        'claims': top_claims,
    })
    
    return top_100_similar_patents_df

In [61]:
len(claims)

240

In [42]:
df = df_claim_cpc_train

In [43]:
df = df.rename(columns={"text_x": "text", "section_id_y": "section_id", "subsection_id_y": "subsection_id","group_id_y": "group_id", "subgroup_id_y": "subgroup_id"})

In [44]:
claims = list(df.text)
patent_id = list(df.patent_id)

In [45]:
new_claim = ['The computer-implemented method of claim 4 wherein the filtering criteria comprises a number of page groups in the page group list is greater than 1, and the representative rank is less than or equal to the exit-entry max paths. The computer-implemented method of claim 2 wherein the selecting of the top n unconverted session paths further comprises only selecting unconverted sessions that meet a filtering criteria. The computer-implemented method of claim 1 further comprising creating a master table that lists the set of load test scenarios and the think times that represents a distribution of paths taken by real users during the peak hour.']

In [46]:
new_claim

['The computer-implemented method of claim 4 wherein the filtering criteria comprises a number of page groups in the page group list is greater than 1, and the representative rank is less than or equal to the exit-entry max paths. The computer-implemented method of claim 2 wherein the selecting of the top n unconverted session paths further comprises only selecting unconverted sessions that meet a filtering criteria. The computer-implemented method of claim 1 further comprising creating a master table that lists the set of load test scenarios and the think times that represents a distribution of paths taken by real users during the peak hour.']

In [47]:
if os.path.exists('top_100_similar_patents_df.csv'):
    get_top_n_similar_patents_df = pd.read_csv('top_100_similar_patents_df.csv')

else:
    get_top_n_similar_patents_df = get_top_n_similar_patents_df(new_claim[0], claims)

2021-10-23 12:34:06 - Load pretrained SentenceTransformer: output/bi-encoder/stsb_augsbert_BM25_roberta-base-2021-10-22_08-50-03
2021-10-23 12:34:10 - Use pytorch device: cpu


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

New_claim: The computer-implemented method of claim 4 wherein the filtering criteria comprises a number of page groups in the page group list is greater than 1, and the representative rank is less than or equal to the exit-entry max paths. The computer-implemented method of claim 2 wherein the selecting of the top n unconverted session paths further comprises only selecting unconverted sessions that meet a filtering criteria. The computer-implemented method of claim 1 further comprising creating a master table that lists the set of load test scenarios and the think times that represents a distribution of paths taken by real users during the peak hour.

Patent ID: 6888925
PubMed Claim: A method for testing a conference server for use in a telephone network, said conference server having a plurality full duplex channels, each channel including an input to the conference server and an output from the server, said method comprising the steps of: in a first generating step, generating a plu

In [48]:
get_top_n_similar_patents_df.head()

Unnamed: 0,top_claim_ids,cosine_similarity,claims
0,6888925,0.5478,A method for testing a conference server for u...
1,6888567,0.5451,An electronic photographing apparatus comprisi...
2,6882735,0.5416,A computer processor method of creating an aud...
3,6862742,0.5239,An information distribution apparatus for a CA...
4,6889386,0.5228,A program guide data processing apparatus comp...


In [49]:
result = pd.concat([get_top_n_similar_patents_df, df], axis=1, join='inner')

In [50]:
result.to_csv(r'output/df_claim_cpc_new_claim_result.csv', index = False)

In [51]:
df_claim_cpc_result = pd.read_csv('output/df_claim_cpc_new_claim_result.csv', encoding='ISO-8859-1')

In [52]:
df_claim_cpc_result = result
df_claim_cpc_result.head()

Unnamed: 0,top_claim_ids,cosine_similarity,claims,ï»¿,patent_id,text,section_id,subsection_id,group_id,subgroup_id,category,sequence,uuid
0,6888925,0.5478,A method for testing a conference server for u...,0,6845797,"A tire comprising: a tread, where said tread i...",['C'],['C08'],['C08L'],['C08L21/00'],['inventional'],[0],3183e92b-ebb6-11ea-a344-121df0c29c1e
1,6888567,0.5451,An electronic photographing apparatus comprisi...,1,6851219,A process for increasing the wetting rate of w...,['C'],['C09'],['C09K'],['C09K17/16'],['inventional'],[0],382c734f-ebbb-11ea-a344-121df0c29c1e
2,6882735,0.5416,A computer processor method of creating an aud...,2,6851417,An ignition device for internal combustion eng...,['H'],['H01'],['H01L'],['H01L2224/48472'],['additional'],[1],267eefaa-ebcb-11ea-a344-121df0c29c1e
3,6862742,0.5239,An information distribution apparatus for a CA...,3,6858711,A compound having the formula: wherein R 1 is ...,['C'],['C07'],['C07H'],['C07H21/00'],['inventional'],[2],380bc86b-ebb7-11ea-a344-121df0c29c1e
4,6889386,0.5228,A program guide data processing apparatus comp...,4,6862379,"An integrated circuit, the integrated circuit ...",['H'],['H01'],['H01S'],['H01S5/0264'],['inventional'],[1],2d002990-ebc5-11ea-a344-121df0c29c1e


In [53]:
df_claim_cpc_result['section_id'] = df_claim_cpc_result['section_id'].str.replace('nan|[{}\s]','').str.split(',').apply(set).str.join(',').str.strip(',').str.replace(",{2,}",",")
df_claim_cpc_result['subsection_id'] = df_claim_cpc_result['subsection_id'].str.replace('nan|[{}\s]','').str.split(',').apply(set).str.join(',').str.strip(',').str.replace(",{2,}",",")
df_claim_cpc_result['group_id'] = df_claim_cpc_result['group_id'].str.replace('nan|[{}\s]','').str.split(',').apply(set).str.join(',').str.strip(',').str.replace(",{2,}",",")
df_claim_cpc_result['subgroup_id'] = df_claim_cpc_result['subgroup_id'].str.replace('nan|[{}\s]','').str.split(',').apply(set).str.join(',').str.strip(',').str.replace(",{2,}",",")

  df_claim_cpc_result['section_id'] = df_claim_cpc_result['section_id'].str.replace('nan|[{}\s]','').str.split(',').apply(set).str.join(',').str.strip(',').str.replace(",{2,}",",")
  df_claim_cpc_result['subsection_id'] = df_claim_cpc_result['subsection_id'].str.replace('nan|[{}\s]','').str.split(',').apply(set).str.join(',').str.strip(',').str.replace(",{2,}",",")
  df_claim_cpc_result['group_id'] = df_claim_cpc_result['group_id'].str.replace('nan|[{}\s]','').str.split(',').apply(set).str.join(',').str.strip(',').str.replace(",{2,}",",")
  df_claim_cpc_result['subgroup_id'] = df_claim_cpc_result['subgroup_id'].str.replace('nan|[{}\s]','').str.split(',').apply(set).str.join(',').str.strip(',').str.replace(",{2,}",",")


In [54]:
k_similar_patents = df_claim_cpc_result.nlargest(10, ['cosine_similarity'])

In [55]:
k_similar_patents

Unnamed: 0,top_claim_ids,cosine_similarity,claims,ï»¿,patent_id,text,section_id,subsection_id,group_id,subgroup_id,category,sequence,uuid
0,6888925,0.5478,A method for testing a conference server for u...,0,6845797,"A tire comprising: a tread, where said tread i...",['C'],['C08'],['C08L'],['C08L21/00'],['inventional'],[0],3183e92b-ebb6-11ea-a344-121df0c29c1e
1,6888567,0.5451,An electronic photographing apparatus comprisi...,1,6851219,A process for increasing the wetting rate of w...,['C'],['C09'],['C09K'],['C09K17/16'],['inventional'],[0],382c734f-ebbb-11ea-a344-121df0c29c1e
2,6882735,0.5416,A computer processor method of creating an aud...,2,6851417,An ignition device for internal combustion eng...,['H'],['H01'],['H01L'],['H01L2224/48472'],['additional'],[1],267eefaa-ebcb-11ea-a344-121df0c29c1e
3,6862742,0.5239,An information distribution apparatus for a CA...,3,6858711,A compound having the formula: wherein R 1 is ...,['C'],['C07'],['C07H'],['C07H21/00'],['inventional'],[2],380bc86b-ebb7-11ea-a344-121df0c29c1e
4,6889386,0.5228,A program guide data processing apparatus comp...,4,6862379,"An integrated circuit, the integrated circuit ...",['H'],['H01'],['H01S'],['H01S5/0264'],['inventional'],[1],2d002990-ebc5-11ea-a344-121df0c29c1e
5,6969619,0.4999,A method of endpoint detection during plasma p...,5,6862742,An information distribution apparatus for a CA...,['H'],['H04'],['H04N'],['H04N21/42684'],['inventional'],[0],11dfe8f5-ebbf-11ea-a344-121df0c29c1e
6,6969621,0.4972,"A method for uniformly contaminating samples, ...",6,6868683,A cooling system for cooling racks in a data c...,['H'],['H05'],['H05K'],['H05K7/20836'],['inventional'],[0],2d1b7a64-ebb7-11ea-a344-121df0c29c1e
7,6962884,0.495,A method for processing integrated circuit dev...,7,6882689,A data coding method comprising: accepting dig...,['H'],['H04'],['H04L'],['H04L27/001'],['inventional'],[0],029dbf84-ebb7-11ea-a344-121df0c29c1e
8,6888482,0.4882,An analog to digital converter (ADC) comprisin...,8,6882704,A radiation source for generating extreme ultr...,['H'],['H05'],['H05G'],['H05G2/003'],['inventional'],[0],053e423e-ebb7-11ea-a344-121df0c29c1e
9,6882723,0.4867,A method of quantifying an automation benefit ...,9,6882707,A method for training an operator for relay re...,['H'],['H04'],['H04M'],['H04M1/64'],['inventional'],[0],0c70cb18-ebb6-11ea-a344-121df0c29c1e


In [56]:
final_result_mod = k_similar_patents.mode()
# final_result_max = k_similar_patents.max()
print('Prediction for section_id of New Patent is: ', final_result_mod.iloc[0]['section_id'])
print('Prediction for section_id of New Patent is: ', k_similar_patents.iloc[0:10]['section_id'])
print('Prediction for subsection_id of New Patent is: ', k_similar_patents.iloc[0:10]['subsection_id'])
print('Prediction for group_id of New Patent is: ', k_similar_patents.iloc[0:10]['group_id'])
print('Prediction for subgroup_id of New Patent is: ', k_similar_patents.iloc[0:10]['subgroup_id'])

Prediction for section_id of New Patent is:  ['H']
Prediction for section_id of New Patent is:  0    ['C']
1    ['C']
2    ['H']
3    ['C']
4    ['H']
5    ['H']
6    ['H']
7    ['H']
8    ['H']
9    ['H']
Name: section_id, dtype: object
Prediction for subsection_id of New Patent is:  0    ['C08']
1    ['C09']
2    ['H01']
3    ['C07']
4    ['H01']
5    ['H04']
6    ['H05']
7    ['H04']
8    ['H05']
9    ['H04']
Name: subsection_id, dtype: object
Prediction for group_id of New Patent is:  0    ['C08L']
1    ['C09K']
2    ['H01L']
3    ['C07H']
4    ['H01S']
5    ['H04N']
6    ['H05K']
7    ['H04L']
8    ['H05G']
9    ['H04M']
Name: group_id, dtype: object
Prediction for subgroup_id of New Patent is:  0         ['C08L21/00']
1         ['C09K17/16']
2    ['H01L2224/48472']
3         ['C07H21/00']
4        ['H01S5/0264']
5      ['H04N21/42684']
6       ['H05K7/20836']
7        ['H04L27/001']
8         ['H05G2/003']
9          ['H04M1/64']
Name: subgroup_id, dtype: object


In [64]:
def get_top_n_similar_patents_df(new_claim, claims):

    embedder = SentenceTransformer('output/bi-encoder/stsb_augsbert_BM25_roberta-base-2021-10-22_08-50-03')
    query_embeddings = embedder.encode([new_claim])
    claim_embeddings = embedder.encode(claims)
    top_n = 40
    distances = scipy.spatial.distance.cdist(query_embeddings, claim_embeddings, "cosine")[0]
    results = zip(range(len(distances)), distances)
    results = sorted(results, key=lambda x: x[1])
    top_claim_ids = []
    top_claims = []
    top_similarity_scores = []

    # Find the closest 100 patent claims for each new_claim based on cosine similarity
    for idx, distance in results[0:top_n]:
        top_claim_ids.append(patent_id[idx])
        top_claims.append(claims[idx])
        top_similarity_scores.append(round((1-distance), 4))
        print('Patent ID: ' + str(patent_id[idx]))
        print('PubMed Claim: ' + claims[idx])
        print('Similarity Score: ' + "%.4f" % (1-distance))
        print('\n')
        
    top_100_similar_patents_df = pd.DataFrame({
        'top_claim_ids': top_claim_ids,
        'cosine_similarity': top_similarity_scores,
        'claims': top_claims,
    })
    
    return top_100_similar_patents_df

def F1Measure(y_true, y_pred):
    temp = 0
    for i in range(y_true.shape[0]):
        if (sum(y_true[i]) == 0) and (sum(y_pred[i]) == 0):
            continue
        temp+= (2*sum(np.logical_and(y_true[i], y_pred[i])))/ (sum(y_true[i])+sum(y_pred[i]))
    return temp/ y_true.shape[0]

def Recall(y_true, y_pred):
    temp = 0
    for i in range(y_true.shape[0]):
        if sum(y_pred[i]) == 0:
            continue
        temp+= sum(np.logical_and(y_true[i], y_pred[i]))/ sum(y_pred[i])
    return temp/ y_true.shape[0]

def Precision(y_true, y_pred):
    temp = 0
    for i in range(y_true.shape[0]):
        if sum(y_true[i]) == 0:
            continue
        temp+= sum(np.logical_and(y_true[i], y_pred[i]))/ sum(y_true[i])
    return temp/ y_true.shape[0]

def Hamming_Loss(y_true, y_pred):
    temp=0
    for i in range(y_true.shape[0]):
        temp += np.size(y_true[i] == y_pred[i]) - np.count_nonzero(y_true[i] == y_pred[i])
    return temp/(y_true.shape[0] * y_true.shape[1])

def Accuracy(y_true, y_pred):
    temp = 0
    for i in range(y_true.shape[0]):
        temp += sum(np.logical_and(y_true[i], y_pred[i])) / sum(np.logical_or(y_true[i], y_pred[i]))
    return temp / y_true.shape[0]

df_claim_cpc_test = pd.read_csv('C:/Users/강지연/cpc_test_60.csv', encoding='ISO-8859-1')
df_claim_cpc_train = pd.read_csv('C:/Users/강지연/cpc_training_234.csv', encoding='ISO-8859-1')
# df_claim_cpc_train_1000 = pd.read_csv('/home/ubuntu/deeppatentsimilarity/patentdata/df_claim_cpc_all_len_150_200_1000.csv', encoding='ISO-8859-1')
# df_claim_cpc_test = pd.read_csv('/home/ubuntu/deeppatentsimilarity/patentdata/prelabel/NewTest/df_1_L_43259_test_100.csv', encoding='ISO-8859-1')
# df_claim_cpc_train = pd.read_csv('/home/ubuntu/deeppatentsimilarity/patentdata/prelabel/NewTest/df_1_L_43259.csv', encoding='ISO-8859-1')

claims = list(df_claim_cpc_train.text)
patent_id = list(df_claim_cpc_train.patent_id)

listofpredictdfs = []

In [65]:
for i in range(len(df_claim_cpc_test)):
    get_top_n_similar_patents_df_predict = get_top_n_similar_patents_df(df_claim_cpc_test.text.iloc[i], claims)
    result = pd.merge(get_top_n_similar_patents_df_predict, df_claim_cpc_train, left_on='top_claim_ids',right_on='patent_id',how='left',suffixes=('_left','_right'))
    locals()["predict_n"+str(i)] = result.copy()
    listofpredictdfs.append("predict_n"+str(i))

df = pd.concat(map(lambda x: eval(x), listofpredictdfs),keys= listofpredictdfs ,axis=0)

2021-10-23 13:39:59 - Load pretrained SentenceTransformer: output/bi-encoder/stsb_augsbert_BM25_roberta-base-2021-10-22_08-50-03
2021-10-23 13:40:02 - Use pytorch device: cpu


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888173
PubMed Claim: A light emitting diode device comprising: a substrate; an LED mounted on the substrate; electrodes provided on the substrate and connected to the LED for applying a current to the LED; an encapsulating resin encapsulating the LED; and a color filter layer provided on the encapsulating resin, the color filter layer comprising three color layers, wherein the three color layers comprise a cyan layer, a magenta layer and a yellow layer.
Similarity Score: 0.7624


Patent ID: 6888520
PubMed Claim: An active matrix type display, comprising: a substrate; and a plurality of pixels arranged in the form of a matrix on the substrate; wherein each of the pixels comprises a light emission device prepared by forming a transparent pixel electrode and a metal pixel electrode on respective surfaces of a light emitting layer, and a driving circuit for controlling a driving current of the light emission device; the driving circuit is formed on the substrate; the light emis

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6962860
PubMed Claim: A method of manufacturing a semiconductor device, wherein an insular semiconductor film having crystalline characteristics is formed by irradiating a first laser beam in a first direction to a semiconductor film at a region defined by pattern information and then irradiating a second laser beam in a second direction at a region defined by said pattern information and thereby enhancing the crystalline characteristics of said regions defined by said pattern information, followed by patterning of said regions where the crystalline characteristics have been enhanced through the use of said pattern information, wherein: said first laser beam or said second laser beam is irradiated to said semiconductor film at a partial region containing said regions defined by said pattern information; said first direction and said second direction cross with each other; and in a thin film transistor fabricated using said insular semiconductor film, a direction in which car

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6887744
PubMed Claim: A method of manufacturing a thin film transistor substrate comprising the steps of: forming a pattern of a semiconductor layer on or above an insulating substrate; forming a gate insulating film on the semiconductor layer; forming a gate electrode on the gate insulating film; introducing impurities into portions of the semiconductor layer, which are to be a source region and a drain region; forming an interlayer insulating film including a plurality of insulating films with mutually different dielectric constants on the semiconductor layer and the gate electrode; forming contact holes in portions of the interlayer insulating film, the portions being at least on the source region and the drain region; forming a transparent conductive film on the interlayer insulating film and inner surfaces of the contact holes; forming a metal film on the transparent conductive film; forming a interconnection electrode in a portion including the contact hole of the drai

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6962837
PubMed Claim: A method for manufacturing a semiconductor device comprising the steps of: forming a first semiconductor film over a substrate; providing a crystallization promoting material with the first semiconductor film; crystallizing the first semiconductor film; forming a barrier film on the first semiconductor film; forming a second semiconductor film comprising silicon and an element which facilitates absorption of the crystallization promoting material on the barrier film; heating the crystallized first semiconductor film so that the crystallization promoting material contained in the crystallized first semiconductor film is transferred to the second semiconductor film through the barrier film; and removing the second semiconductor film after the heating step, wherein a thickness of the second semiconductor film is larger than that of the first semiconductor film, and wherein the element which facilitates absorption of the crystallization promoting material i

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6887512
PubMed Claim: A method for fabricating a component board including a printed-wiring substrate having a substantially plate-like shape, a main face and a back face, and comprising main-face-side connection terminals solder-bonded to connection terminals of an electronic component which is to be mounted on the main face; and back-face-side connection terminals connected, through mechanical contact, to connection terminals of another substrate which is to be connected to the printed-wiring substrate on the back-face side thereof, the method comprising: forming a first main-face-side Au plating layer on the surfaces of the main-face-side connection terminals and a first back-face-side Au plating layer on the surfaces of the back-face-side connection terminals; covering the first main-face-side Au plating layer with a protection layer; forming a second back-face-side Au plating layer on the first back-face-side Au plating layer; and removing the protection layer after com

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6933493
PubMed Claim: An image sensor, comprising: a substrate having an upper surface, a lower surface, and a slot penetrating through the substrate from the upper surface to the lower surface, the upper surface being formed with a plurality of first connection points, and the lower surface being formed with a plurality of second connection points; a metal sheet attached to the lower surface of the substrate and located under the slot of the substrate to form a cavity together with the slot of the substrate; a photosensitive chip formed with a plurality of bonding pads, the photosensitive chip being arranged within the cavity and mounted to the metal sheet; a plurality of wires for electrically connecting the bonding pads of the photosensitive chip to the first connection points of the upper surface of the substrate; and a transparent layer arranged on the upper surface of the substrate to cover the photosensitive chip so that the photosensitive chip may receive optical sig

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888248
PubMed Claim: A multi-level metal interconnect structure for an NMOS transistor containing integrated circuit comprising: at least one NMOS transistor disposed along at least one circuit pathway in a multi-level semiconductor device between reference voltage potential and at least one bonding pad comprising one of an input/output signal source and a voltage source; and, at least one metal interconnect line forming at least a portion of the at least one circuit pathway, the at least one metal interconnect line extended in length along at least a portion of the circuit pathway by forming a compacted serpentine shaped pathway within a predetermined volume of the semiconductor device, the predetermined volume extending through a plurality of metallization levels.
Similarity Score: 0.7219


Patent ID: 6887759
PubMed Claim: A method for forming, in a substrate of a first conductivity type, a MOS transistor, comprising the steps of: a) forming an insulated gate on the subst

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6887754
PubMed Claim: A method of fabricating a semiconductor memory device, comprising the steps of: forming a gate electrode on a substrate; forming first and second diffusion regions in said substrate respectively adjacent to first and second side walls of said gate electrode; forming first and second side wall insulation films respectively on said first and second side walls of said gate electrode; forming a first interlayer insulation film on said gate electrode such that said first interlayer insulation film covers said first and second side wall insulation films; forming a first contact hole in said first interlayer insulation film such that said first contact hole exposes said first diffusion region; forming a bit line pattern on said first interlayer insulation film so as to fill said first contact hole in electrical contact with said first diffusion region; forming a second interlayer insulation film on said first interlayer insulation film so as to cover said bit 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6887744
PubMed Claim: A method of manufacturing a thin film transistor substrate comprising the steps of: forming a pattern of a semiconductor layer on or above an insulating substrate; forming a gate insulating film on the semiconductor layer; forming a gate electrode on the gate insulating film; introducing impurities into portions of the semiconductor layer, which are to be a source region and a drain region; forming an interlayer insulating film including a plurality of insulating films with mutually different dielectric constants on the semiconductor layer and the gate electrode; forming contact holes in portions of the interlayer insulating film, the portions being at least on the source region and the drain region; forming a transparent conductive film on the interlayer insulating film and inner surfaces of the contact holes; forming a metal film on the transparent conductive film; forming a interconnection electrode in a portion including the contact hole of the drai

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6962874
PubMed Claim: A method for fabricating a semiconductor device, the method comprising the steps of: sequentially forming a first anti-reflection layer including no photoactive material and a first photoresist film including silicon/photoactive material on a silicon substrate; exposing and wet-developing the first photoresist film, thereby forming a first image layer having a predetermined shape; sequentially forming a second anti-reflection layer including no photoactive material and a second photoresist film including silicon/photoactive material on an entire surface of the substrate including the first image layer; exposing and wet-developing the second photoresist film, thereby forming a second image layer which opens wider than the first image layer; supplying oxygen plasma to a resultant structure in order to transfer a pattern of the second image layer on the second anti-reflection layer and to transfer a pattern of the first image layer exposed through the seco

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6887744
PubMed Claim: A method of manufacturing a thin film transistor substrate comprising the steps of: forming a pattern of a semiconductor layer on or above an insulating substrate; forming a gate insulating film on the semiconductor layer; forming a gate electrode on the gate insulating film; introducing impurities into portions of the semiconductor layer, which are to be a source region and a drain region; forming an interlayer insulating film including a plurality of insulating films with mutually different dielectric constants on the semiconductor layer and the gate electrode; forming contact holes in portions of the interlayer insulating film, the portions being at least on the source region and the drain region; forming a transparent conductive film on the interlayer insulating film and inner surfaces of the contact holes; forming a metal film on the transparent conductive film; forming a interconnection electrode in a portion including the contact hole of the drai

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6887744
PubMed Claim: A method of manufacturing a thin film transistor substrate comprising the steps of: forming a pattern of a semiconductor layer on or above an insulating substrate; forming a gate insulating film on the semiconductor layer; forming a gate electrode on the gate insulating film; introducing impurities into portions of the semiconductor layer, which are to be a source region and a drain region; forming an interlayer insulating film including a plurality of insulating films with mutually different dielectric constants on the semiconductor layer and the gate electrode; forming contact holes in portions of the interlayer insulating film, the portions being at least on the source region and the drain region; forming a transparent conductive film on the interlayer insulating film and inner surfaces of the contact holes; forming a metal film on the transparent conductive film; forming a interconnection electrode in a portion including the contact hole of the drai

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6887748
PubMed Claim: A mixed-mode process for integration circuits comprising: providing a semiconductor substrate, a surface of the semiconductor substrate comprising at least a first conductor formed in a first conductive region, at least a second conductor formed in a second conductive region, at least a metal-oxide-semiconductor (MOS) transistor formed in a MOS transistor region, and at least a capacitor formed in a capacitor region; forming a mask on the semiconductor substrate to cover the MOS transistor, the first conductor, and the capacitor and to expose the second conductor; performing a first etching process to remove a specific thickness of the second conductor; and performing a first ion implantation process to dope the second conductor with first type dopants; wherein the order of the first etching process and the first ion implantation process can be switched.
Similarity Score: 0.7740


Patent ID: 6962845
PubMed Claim: A method for manufacturing a semiconduct

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6887904
PubMed Claim: A product corresponding to formula (I) below: in which: A represents a saturated or partially unsaturated, optionally substituted cyclic hydrocarbon containing from 3 to 14 carbon atoms; X is selected from O and Nå ììå ììå ììOå ììå ììå ìì(R2), each radical R or (R2) is independently selected from H, C1-C7 alkyl, cyclo(C3-C9)alkyl(C1-C7)alkylaryl(C1-C7)alkyl-, substituted C1-C7 alkyl, substituted cyclo(C3-C9)alkyl(C1-C7)alkyl-, and substituted aryl(C1-C7)alkyl-, or R is a halogen and (R2) is as defined above, Ar 1 represents a first phenyl nucleus, substituted with 1 to 4 radicals O(R3), in which R3 is independently selected from H and C1-C3 alkyl, and in which an R3 may be simultaneously linked to two adjacent oxygen atoms borne by the, first phenyl nucleus, and Ar 2 is selected from 3-hydroxy-4-methoxyphenyl; 4-hydroxy-3-methoxyphenyl; 3-hydroxy-4-aminophenyl; 4-hydroxy-3-aminophenyl; 3-hydroxy-4-(N,N-dimethylamino)phenyl;

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888038
PubMed Claim: In a method for forming a light olefin product by the catalytic cracking of a hydrocarbon feed that contains at least in part a mixture of heavy olefins using both a catalyst system and reaction conditions effective for the formation of light olefins, the improvement comprising, co-feeding along with said hydrocarbon feed to said catalytic cracking a minor effective amount of at least one oxygen containing hydrocarbon selected from the group consisting of alcohols, ketones, ethers, carboxylic acids, polyols, aldehydes, cyclic ethers, epoxides, and mixtures of two or more thereof, whereby aromatics and/or C8 and heavier hydrocarbons in said light olefin product are reduced in amount below what they would have been had the oxygen containing compound not been employed.
Similarity Score: 0.5338


Patent ID: 6888023
PubMed Claim: A process for producing pyromellitic acid which comprises oxidizing 2,4,5-trimethylbenzaldehyde and/or its oxidized derivative in 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6887661
PubMed Claim: An isolated insect polynucleotide that encodes a bHLH-PAS polypeptide that is involved in binding juvenile hormone III, wherein said polynucleotide hybridizes under stringent conditions with a polynucleotide having a nucleotide sequence selected from the group consisting of SEQ ID NO:1, SEQ ID NO:2, SEQ ID NO:3 and SEQ ID NO:6, wherein the stringent conditions comprise hybridization in 1å ììå ììSSC and 0.1% SDS at about 55å ììå ìì C. for about 60 minutes, wherein said insect is selected from the group consisting of Coleoptera, Siphonoptera, Orthoptera, Thysanoptera, Lepidoptera, Hemiptera , and Diptera , and wherein said polynucleotide has a nucleotide sequence that encodes a polypeptide having an amino acid sequence selected from the group consisting of SEQ ID NO:4 
Similarity Score: 0.6516


Patent ID: 6887471
PubMed Claim: A method for inhibiting functional CTLA4 positive T cell interactions with B7 positive cells comprising conta

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888020
PubMed Claim: A water soluble dithioester compound represented by the following Chemical Formula 1: in the Chemical Formula 1, Z is a hydrogen, a chlorine; or an alkyl, an aryl, an alkylthio, an alkoxycarbonyl, an aryoxycarbonyl (å ììå ììå ììCOORå ììå ììå ìì), an carboxy (å ììå ììå ììCOOH), an acyloxy (å ììå ììå ììO 2 CRå ììå ììå ìì), a cabamoyl (å ììå ììå ììCONRå ììå ììå ìì), a cyano (å ììå ììå ììCN), a dialkyl-phosphonato, a diaryl-phosphonato (å ììå ììå ììP(å ììå ììå ììO)ORå ììå ììå ìì 2 ), a dialkyl-phosphinato, or a diaryl-phosphinato (å ììå ììå ììP(å ììå ììå ììO)Rå ììå ììå ìì 2 ) unsubstituted or substituted which substitutents, Rå ììå ììå ìì is C 1-18 alkyl, C 2-18 alkenyl, aryl, heterocyclyl, aralkyl, or alkylaryl unsubstituted or substit

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6851219
PubMed Claim: A process for increasing the wetting rate of water repellent soil which comprises the steps of: i) preparing an aqueous wetting agent composition consisting essentially of: a) alkyl polyglycoside; b) ethylene oxide-propylene oxide block copolymer; and c) water wherein the weight ratio of the alkyl polyglycoside to the block copolymer is from 6:1 to 0.5:1; and ii) intimately contacting water repellent soil with an effective amount of said wetting agent composition.
Similarity Score: 0.8114


Patent ID: 6887320
PubMed Claim: A process for applying a chromate-free, corrosion resistant coating to a product formed from a magnesium based material, comprising the steps of: degreasing the product formed from the magnesium based material in a degreasing solution; cleaning the product formed from the magnesium based material in a highly alkaline cleaning solution; deoxidizing the product formed from the magnesium based material in a deoxidizing solution; and imme

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888037
PubMed Claim: A process for producing cumene which comprises the step of contacting benzene and propylene with a particulate molecular sieve alkylation catalyst having a catalytic activity under at least partial liquid phase alkylating conditions to provide a product comprising cumene and a polyisopropylbenzene fraction, said particulate molecular sieve alkylation catalyst comprising MCM-49 having a surface to volume ratio of about 120 inch å ììå ììå ìì1 ; and wherein said catalytic activity of said particulate molecular sieve alkylation catalyst is greater than that of a molecular sieve alkylation catalyst having a surface to volume ratio of less than about 80 in
Similarity Score: 0.7152


Patent ID: 6888023
PubMed Claim: A process for producing pyromellitic acid which comprises oxidizing 2,4,5-trimethylbenzaldehyde and/or its oxidized derivative in the presence of a catalyst containing iron, manganese and bromine, and which may additionally contain z

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888038
PubMed Claim: In a method for forming a light olefin product by the catalytic cracking of a hydrocarbon feed that contains at least in part a mixture of heavy olefins using both a catalyst system and reaction conditions effective for the formation of light olefins, the improvement comprising, co-feeding along with said hydrocarbon feed to said catalytic cracking a minor effective amount of at least one oxygen containing hydrocarbon selected from the group consisting of alcohols, ketones, ethers, carboxylic acids, polyols, aldehydes, cyclic ethers, epoxides, and mixtures of two or more thereof, whereby aromatics and/or C8 and heavier hydrocarbons in said light olefin product are reduced in amount below what they would have been had the oxygen containing compound not been employed.
Similarity Score: 0.7465


Patent ID: 6887958
PubMed Claim: A process for copolymerizing ethylene or propylene with one another or with other olefinically unsaturated compounds, which compri

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6938792
PubMed Claim: A foldable underlay for collecting liquids underneath domestic equipment, comprising: a rectangular, planar sheet of waterproof material, having a front edge, a rear edge, and two opposite side edges; a pair of first scoring lines running parallel with said side edges from the front edge to the rear edge; a second scoring line running parallel to said rear edge from one side edge to the other, the second scoring line intersecting said pair of first scoring lines at right angles; a bottom surface defined by said first and second scoring lines and said front edge, said bottom surface connecting to a back piece at said second scoring line, and to a pair of side pieces at said first scoring lines, respectively; a third scoring line running parallel to said first scoring lines from said front edge to said rear edge, midways between said first scoring lines; a fourth scoring line running parallel to said second scoring line from one of said side edges to the 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6887104
PubMed Claim: A cable end connector assembly adapted for mating with a complementary electrical connector, comprising: a dielectric housing defining a plurality of passageways extending in a front-to-back direction and comprising an upper wall, a lower wall opposite to the upper wall, and a pair of sidewalls connecting the upper and lower walls, a receiving space being defined by the upper wall, the lower wall and the pair of sidewalls and communicating with the passageways and adapted for receiving a mating portion of the complementary connector; a plurality of contact units respectively received in the passageways, each contact unit comprising three mating portions at one end thereof partially exposed into tho receiving specs and adapted for electrically connecting the complementary connector, and a tail portion connected to the three mating portions and extending beyond a rear face of the housing, wherein the tail portion has a U-shaped configuration; a plurality 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6887104
PubMed Claim: A cable end connector assembly adapted for mating with a complementary electrical connector, comprising: a dielectric housing defining a plurality of passageways extending in a front-to-back direction and comprising an upper wall, a lower wall opposite to the upper wall, and a pair of sidewalls connecting the upper and lower walls, a receiving space being defined by the upper wall, the lower wall and the pair of sidewalls and communicating with the passageways and adapted for receiving a mating portion of the complementary connector; a plurality of contact units respectively received in the passageways, each contact unit comprising three mating portions at one end thereof partially exposed into tho receiving specs and adapted for electrically connecting the complementary connector, and a tail portion connected to the three mating portions and extending beyond a rear face of the housing, wherein the tail portion has a U-shaped configuration; a plurality 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888244
PubMed Claim: Interconnect arrangement, comprising: a first layer and a first layer surface, the first layer having a first insulation material; at least two interconnects situated on the first layer surface, having a second layer surface essentially parallel to the first layer surface, the interconnects having a first material which is electrically conductive; a second layer made of a second insulation material, which second layer is produced on the second layer surface of each interconnect and projects beyond the interconnect, the second layers of adjacent interconnects covering regions between the adjacent interconnects; a third layer covering the second layers, the third layer having a third insulation material and completely closing off the regions between the adjacent interconnects by means of coverage; and a cavity between the first layer, the adjacent interconnects, the second layers and the third layer, the cavity having an electrically insulating effect bet

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6887104
PubMed Claim: A cable end connector assembly adapted for mating with a complementary electrical connector, comprising: a dielectric housing defining a plurality of passageways extending in a front-to-back direction and comprising an upper wall, a lower wall opposite to the upper wall, and a pair of sidewalls connecting the upper and lower walls, a receiving space being defined by the upper wall, the lower wall and the pair of sidewalls and communicating with the passageways and adapted for receiving a mating portion of the complementary connector; a plurality of contact units respectively received in the passageways, each contact unit comprising three mating portions at one end thereof partially exposed into tho receiving specs and adapted for electrically connecting the complementary connector, and a tail portion connected to the three mating portions and extending beyond a rear face of the housing, wherein the tail portion has a U-shaped configuration; a plurality 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888201
PubMed Claim: An integrated multiple vertical npn transistor ESD protection structure on a semiconductor substrate, functionally connected between an integrated circuit input or output pin and ground which will prevent electrostatic discharge damage to said integrated circuit comprising: a first semiconductor layer having a first conductivity dopant dopent type; a second semiconductor layer overlying said first semiconductor layer, having a similar conductivity type as said first layer, but a different dopant concentration; a third semiconductor layer having a second conductivity dopant type opposite that of said first semiconductor layer, disposed in overlying relation to said second semiconductor layer; a plurality of first regions of said first conductivity type electrically connecting with said first semiconductor layer, having a top element making electrical contact to said first regions and said first semiconductor layer; a plurality of second regions of said s

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6933493
PubMed Claim: An image sensor, comprising: a substrate having an upper surface, a lower surface, and a slot penetrating through the substrate from the upper surface to the lower surface, the upper surface being formed with a plurality of first connection points, and the lower surface being formed with a plurality of second connection points; a metal sheet attached to the lower surface of the substrate and located under the slot of the substrate to form a cavity together with the slot of the substrate; a photosensitive chip formed with a plurality of bonding pads, the photosensitive chip being arranged within the cavity and mounted to the metal sheet; a plurality of wires for electrically connecting the bonding pads of the photosensitive chip to the first connection points of the upper surface of the substrate; and a transparent layer arranged on the upper surface of the substrate to cover the photosensitive chip so that the photosensitive chip may receive optical sig

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6969130
PubMed Claim: An enclosure adapted for housing an electronic device, comprising: a housing; at least one door pivotally connected to the housing, wherein the at least one door is pivotally moveable between a closed position and an open position about a first pivot axis; and at least one shield connected to the housing, wherein the at least one shield is pivotally moveable between a closed position and an open position about a second pivot axis, and wherein the at least one shield is adapted to protect the enclosure from adverse environmental conditions, wherein the second pivot axis is defined by a hinge and a cross member, and wherein the enclosure further comprises: at least one bracket, wherein the at least one bracket is included in at least one lateral portion of the housing and configured to support the hinge and the cross member.
Similarity Score: 0.7509


Patent ID: 6938846
PubMed Claim: Apparatus for screening a fibrous suspension comprising a housing, a rot

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6887089
PubMed Claim: A lever-type connector ( 30 ) having a housing ( 31 ) for accommodating terminal fittings ( 33 ) that can be brought into contact with mating terminal fittings ( 22 ), the housing ( 31 ) being connectable with a mating housing ( 21 ), and at least one lever ( 38 ) rotatably supported thereon, comprising: at least one cam means ( 39 ) on the lever ( 38 ) for engaging at least one mating cam means ( 24 ) on the mating housing ( 21 ) and a supportable portion ( 40 ) on the lever ( 38 ) for slidable support on a supporting portion ( 47 ) of a supporting member ( 46 ) to permit rotation of the lever ( 38 ); the housing ( 31 ) is displaced to approach the supporting member ( 46 ) as the housings ( 31 , 21 ) are connected, and the supportable portion ( 40 ) is moved relative to the supporting portion ( 47 ) to rotate the lever ( 38 ), thereby engaging the cam means ( 24 ) with the mating cam means ( 39 ) to exhibit a cam action; a moving plate ( 35 ) formed wi

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6966357
PubMed Claim: A venturi-fan and heat sink comprising a venturi-fan, a heat sinking means, and a motor means for rotating the venturi fan relative to the heat sinking means, the heat sinking means having an active heat dissipating surface proximate to the venturi-fan, and the venturi-fan comprising a plurality of venturis that rotate around the heat sinking means proximate to the active heat dissipating surface of the heat sinking means, wherein the plurality of venturis of the venturi-fan are enclosed.
Similarity Score: 0.7407


Patent ID: 6882704
PubMed Claim: A radiation source for generating extreme ultraviolet (EUV) radiation, wherein a hot plasma emitting the desired radiation is generated in a vacuum chamber, comprising: a plasma generation unit which is directly connected with the vacuum chamber for introducing high energy input which is supplied in a pulsed manner in order to generate hot plasma in a small spatial extension and with high density in a vacuum c

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6962843
PubMed Claim: A method, comprising: forming a dielectric layer on a top surface of a semiconductor substrate; forming a silicon layer on a top surface of said dielectric layer; forming a patterned hardmask on a top surface of said silicon layer; removing said silicon layer where said silicon layer is not protected by said patterned hardmask thereby forming a silicon fin on a top surface of said dielectric layer; removing said patterned hardmask and a less than whole portion of said dielectric layer from under said fin; forming a conformal protective layer on at least one sidewall of said fin, said protective layer extending under said fin; and removing said protective layer from said at least one sidewall and from under said fin in a channel region of said fin.
Similarity Score: 0.7643


Patent ID: 6933493
PubMed Claim: An image sensor, comprising: a substrate having an upper surface, a lower surface, and a slot penetrating through the substrate from the upper surfac

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888171
PubMed Claim: A semi-conductor light emitting diode structure comprising: a sapphire substrate; a GaN based light emitting structure; first and second electrodes laterally spaced apart on a same side of said substrate; and means formed in said diode structure for isolating said electrodes from one another; wherein said first electrode comprises a U shaped semi transparent conductive p contact layer; and a metal p contact centered on a closed end of the U shaped layer; and wherein said second electrode comprises an n contact layer and a metal n contact centered in an open end of the U shaped layer.
Similarity Score: 0.7103


Patent ID: 6969130
PubMed Claim: An enclosure adapted for housing an electronic device, comprising: a housing; at least one door pivotally connected to the housing, wherein the at least one door is pivotally moveable between a closed position and an open position about a first pivot axis; and at least one shield connected to the housing, wherein t

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888202
PubMed Claim: An integrated circuit comprising: a latch circuit including, a first inverter including a first PMOS transistor and a first NMOS transistor with a first data node comprising interconnected source/drains (S/D) of the first PMOS and NMOS transistors; a second inverter including a second PMOS transistor and a second NMOS transistor with a second data node comprising interconnected source/drains (S/D) of the second PMOS and NMOS transistors; wherein the gates of the first PMOS and first NMOS transistors are coupled to the second data node; wherein the gates of the second PMOS and second NMOS transistors are coupled to the first data node; an input switch including, an access transistor including a first S/D coupled to the first data node and to the gate of the second PMOS transistor and to the gate of the second NMOS transistor and including a second S/D coupled to a first data access node and including a gate coupled to a first access control node; and a f

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6862379
PubMed Claim: An integrated circuit, the integrated circuit couplable to a semiconductor laser and to a photodetector, the photodetector optically couplable to the semiconductor laser, the semiconductor laser capable of transmitting an optical signal in response to a modulation current, and the photodetector capable of converting the optical signal into a photodetector current, the integrated circuit comprising: a modulator couplable to the semiconductor laser, the modulator capable of providing the modulation current to the semiconductor laser, the modulation current corresponding to an input data signal; and an extinction ratio controller couplable to the photodetector and coupled to the modulator, the extinction ratio controller, in response to the photodetector current, capable of adjusting the modulation current provided by the modulator to the semiconductor laser to generate the optical signal having substantially a predetermined extinction ratio.
Similarity Sc

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888482
PubMed Claim: An analog to digital converter (ADC) comprising: at least one folder receiving an input voltage, generating a first output voltage and a second output voltage, and including: a plurality of amplifiers, wherein each of the amplifier receives one of a plurality of reference voltages and comprises: a bias circuit for providing a bias current to the amplifier based on at least one of a plurality of bias control signals; and calibration logic for generating the bias control signals according to the first output voltage and the second output voltage; wherein during calibration, the calibration logic provides the bias control signals to control the bias circuit of each amplifier in the folder such that the first output voltage is substantially the same as the second output voltage.
Similarity Score: 0.7328


Patent ID: 6888484
PubMed Claim: Apparatus comprising: a) a first signal mixer having a first input for receiving an input signal, a second input, and an 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888482
PubMed Claim: An analog to digital converter (ADC) comprising: at least one folder receiving an input voltage, generating a first output voltage and a second output voltage, and including: a plurality of amplifiers, wherein each of the amplifier receives one of a plurality of reference voltages and comprises: a bias circuit for providing a bias current to the amplifier based on at least one of a plurality of bias control signals; and calibration logic for generating the bias control signals according to the first output voltage and the second output voltage; wherein during calibration, the calibration logic provides the bias control signals to control the bias circuit of each amplifier in the folder such that the first output voltage is substantially the same as the second output voltage.
Similarity Score: 0.7778


Patent ID: 6888202
PubMed Claim: An integrated circuit comprising: a latch circuit including, a first inverter including a first PMOS transistor and a fir

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888482
PubMed Claim: An analog to digital converter (ADC) comprising: at least one folder receiving an input voltage, generating a first output voltage and a second output voltage, and including: a plurality of amplifiers, wherein each of the amplifier receives one of a plurality of reference voltages and comprises: a bias circuit for providing a bias current to the amplifier based on at least one of a plurality of bias control signals; and calibration logic for generating the bias control signals according to the first output voltage and the second output voltage; wherein during calibration, the calibration logic provides the bias control signals to control the bias circuit of each amplifier in the folder such that the first output voltage is substantially the same as the second output voltage.
Similarity Score: 0.8146


Patent ID: 6888571
PubMed Claim: A photosensor system comprising: a photosensor array including a plurality of photosensors arranged two-dimensionally; in

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6887101
PubMed Claim: A connector for differential transmission, comprising: a connector housing; a connector main body attached to the connector housing, the connector main body including a differential transmission electric connector part connectable to a connector of an apparatus, the differential transmission electric connector part having a plurality of signal contact pairs and a plurality of ground contact members arranged alternately, the signal contact pairs each including first and second signal contact members; and a photoelectric conversion module provided to the connector housing to be electrically connected to the connector main body, the photoelectric conversion module including a photoelectric conversion part and an optical fiber cable connector part to which an optical fiber cable is connectable, wherein the differential transmission electric connector part of the connector main body is provided to a first end of the connector housing, and the optical fiber c

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6969622
PubMed Claim: An anisotropically conductive connector comprising: a frame plate in which a plurality of anisotropically conductive film-arranging holes each extending in a thickness-wise direction of the frame plate are formed corresponding to electrode regions, in which electrodes to be inspected of the integrated circuits in the wafer as an object for inspection have been formed, and a plurality of elastic anisotropically conductive films arranged in the respective anisotropically conductive film-arranging holes in this frame plate and each supported by the inner peripheral edge about the anisotropically conductive film-arranging hole, wherein each of the elastic anisotropically conductive films comprises a functional part comprising a plurality of conductive parts for connection each containing conductive particles exhibiting magnetism at high density and extending in the thickness-wise direction of the film and arranged correspondingly to the electrodes to be ins

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6969130
PubMed Claim: An enclosure adapted for housing an electronic device, comprising: a housing; at least one door pivotally connected to the housing, wherein the at least one door is pivotally moveable between a closed position and an open position about a first pivot axis; and at least one shield connected to the housing, wherein the at least one shield is pivotally moveable between a closed position and an open position about a second pivot axis, and wherein the at least one shield is adapted to protect the enclosure from adverse environmental conditions, wherein the second pivot axis is defined by a hinge and a cross member, and wherein the enclosure further comprises: at least one bracket, wherein the at least one bracket is included in at least one lateral portion of the housing and configured to support the hinge and the cross member.
Similarity Score: 0.7492


Patent ID: 6966311
PubMed Claim: An ignition apparatus for an engine having a cylinder head on a combusti

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6918217
PubMed Claim: In a raised floor system having a plurality of pedestals supported on a building floor in sidewardly spaced relationship and projecting upwardly therefrom, and a plurality of floor panels supported on the pedestals adjacent upper ends thereof, whereby the floor panels define a raised floor which is disposed upwardly from the building floor so as to define a space therebetween, the improvement comprising: said pedestal having a fastening part fixedly associated therewith; an elongate one-piece cable management member positioned in said space below said floor panels and extending generally sidewardly between only one pair of adjacent said pedestals, said cable management member including a pair of elongate cantilevered legs defining thereon sidewardly spaced mounting parts which are relatively moveable so as to be respectively engaged with said adjacent pedestals to couple said cable management member therebetween; said cable management member having an e

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888086
PubMed Claim: A current interrupter assembly comprising: a unitary molded insulating structure; a current interrupter embedded in the structure; a conductor element embedded in the structure; a current interchange embedded in the structure and connected to create a current path between the current interrupter and the conductor element; and a semiconductive layer at least partially embedded in the molded structure and covering at least a portion of the conductor element so as to reduce voltage discharges between the conductor element and the structure.
Similarity Score: 0.8102


Patent ID: 6888195
PubMed Claim: A power semiconductor device comprising: a first semiconductor layer of a first conductivity type; a first main electrode electrically connected to the first semiconductor layer; second semiconductor layers of a second conductivity type formed within the first semiconductor layer and periodically arranged in a lateral direction, a profile of an amount of an imp

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6882729
PubMed Claim: A data system, comprising: a secured appliance for receiving broadcast programming; and a remote control for transmitting data frames to the secured appliance; wherein the secured appliance is provided with a decryption key that is complimentary to an encryption key provided to the remote control, the encryption key being used to encrypt at least a synchronization count included as a part of a data frame transmitted by the remote control to the secured appliance which data frame comprises data for commanding an operation of the secured appliance which facilitates access to the broadcast programming.
Similarity Score: 0.6584


Patent ID: 6933491
PubMed Claim: A system for monitoring at least one parameter, comprising: an optical source coupled to at least one forward optical waveguide for transmitting incident light onto the forward optical waveguide; an optical detector coupled to at least one return optical waveguide for receiving reflected light from 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6933491
PubMed Claim: A system for monitoring at least one parameter, comprising: an optical source coupled to at least one forward optical waveguide for transmitting incident light onto the forward optical waveguide; an optical detector coupled to at least one return optical waveguide for receiving reflected light from the return optical waveguide; at least one optical circulator remotely deployed from at least the optical source, wherein the forward optical waveguide and the return optical waveguide are coupled to the circulator; and at least one fiber-optic based sensor coupled to the circulator and responsive to the parameter, wherein the sensor is deployed down a borehole within the earth, is interrogated by the incident light, and produces the reflected light.
Similarity Score: 0.7503


Patent ID: 6888950
PubMed Claim: A warming device for the ears of an individual comprising: a resilient band, the band having two distal portions placed over the individual's ears and c

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6882713
PubMed Claim: A telephone apparatus comprising: a bell signal detector for detecting a bell signal transmitted from a telephone line, an information detector for detecting caller information inserted in a pause period of the bell signal transmitted from telephone line, a display unit for displaying the caller information received during the telephone call, a memory for storing the transmitted caller information displayed in said display unit, a light source for emitting a plurality of color lights, and a controller for controlling said light source, wherein said light source emits a first color light of said plurality of color lights when said bell signal detector detects the bell signal, and said light source emits a second color light of said plurality of color lights when the caller information detected by said information detector coincides with caller information stored in said memory, and said light source emits a third color light of said plurality of color li

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6882709
PubMed Claim: An enhanced telephony services management system for controlling communications between a subscriber location and a head end in a broadband cable television communication system, the subscriber location including a local database, a display device and an interface device for interacting with the display device, and the head end including connections to a network mail server, a conventional broadcast television source and at least one remote network database, the enhanced telephony services management system comprising a plurality of user modules for interacting with the subscriber local database, display device and interface device to initiate and control the presentation and delivery of enhanced telephone services to the subscriber through the display device while simultaneously viewing programming from the conventional broadcast television source; and a plurality of network modulates for interacting with the network mail server and the head end in the

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6886749
PubMed Claim: An image reading apparatus for reading an image on a read medium as attached to an information processing device including at least a display device and an intensity adjustment device for adjusting display intensity of the display device, as freely removable from the device, comprising: a read control device controlling a read of an image; a first notification device notifying the intensity adjustment device of the information processing device of a change in display intensity of the display device depending on a state of the read of an image controlled by said read control device; and a second notification device notifying the information processing device of a change in display direction of a part or all of the image displayed on the display device depending on a state of the read of the image controlled by said read control device.
Similarity Score: 0.6381


Patent ID: 6888248
PubMed Claim: A multi-level metal interconnect structure for an NMOS trans

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888484
PubMed Claim: Apparatus comprising: a) a first signal mixer having a first input for receiving an input signal, a second input, and an output; b) a truncation element having an input coupled with the output of the first signal mixer and an output for providing a number of higher order bits defining an output signal; c) means for providing bits of the output of the first signal mixer, other than those output by the truncation element, as lower order bits, the means for providing including an input coupled with the truncation element, and an output for providing the lower order bits; and d) a noise transfer function element having an input coupled with the output of the means for providing, and an output coupled with the second input of the first signal mixer, wherein the noise transfer function is an L-order transfer function, wherein L is at least 4, and wherein the number of higher order bits is L+1.
Similarity Score: 0.6821


Patent ID: 6888571
PubMed Claim: A phot

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888203
PubMed Claim: A power chip set for a switching mode power supply, comprising a high voltage chip, having a junction field effect transistor, a switching power MOS transistor, and a current sense transistor, and further comprising: a N + drift layer epitaxially grown on a N + substrate forming a plurality of P å ììå ììå ìì wells; a first P å ììå ììå ìì well having a first P å ììå ììå ìì base, a first N + region and a first P + region formed on said first well; a second P å ììå ììå ìì well having a second P å ììå ììå ìì base, a second N + region and a second P + region formed on said second well; a third P å ììå ììå ìì well having a third P å ììå ììå ìì base, a third N + region and a third P + region formed on said third well; a first planar gate is formed on the first and second N + regions or said first and second P å ììå ììå ìì wells, wherein said 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888206
PubMed Claim: A power semiconductor device comprising: an impurity substrate surface of low concentration having a first conductivity type formed in a predetermined region of a substrate surface; an impurity region of high concentration having a second conductivity type formed in said substrate surface adjacent to said impurity substrate surface; a conductor formed over said substrate surface to be insulated from said impurity substrate surface; a conductive shield formed to include at least part of a region over said impurity substrate surface other than a region where said conductor is formed and to include a region overlapping a boundary region between said impurity substrate surface and said impurity region, said conductive shield being formed over, but not directly contacting, said impurity substrate surface and said impurity region with a predetermined insulation film interposed therebetween, at least part of said conductive shield being connected to said condu

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6933478
PubMed Claim: An electric heating device comprising: a. a center rectangular panel with a width of approximately 3å ìì inches to 4å ìì inches and a length of approximately 14 inches to 16 inches, formed of front and back panels of a suitable fabric material, with a heating element sandwiched therein; b. two triangular side panels, formed of front and back panels of the same fabric material as the rectangular panel, each with two straight-cut edges of equal lengths of approximately 11 inches to 13 inches and one curved edge that is cut in an arch of a normal curvature of a normal, at rest, adult having the same length as the length of the rectangular center panel, with heating elements sandwiched within each triangular side panel, with the curved edges of the triangular side panels attached to each of the 14 inches to 16 inch edges of the rectangular panel, to create a curved, scoop-like, three dimensional structure; c. said heating elements that transverse 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6966359
PubMed Claim: A radiator plate rapid cooling apparatus comprising a base deck and radiation fins located above the base deck, the base deck having passages formed therein that contain a capillary means formed by extrusion to form a closed loop of double-layer passages, the closed loop being vacuumized and filled with a heat dissipation medium, the base deck having a heat absorption end to transfer heat to the radiation fins located thereabove to allow the heat to be dispelled by a fan to achieve heat dissipation effect, the passages are reciprocal and have two ends which communicate with one another and house the capillary means which is placed therein or formed by integrated extrusion.
Similarity Score: 0.6775


Patent ID: 6888062
PubMed Claim: A motor assembly having improved electromagnetic noise filtering and dissipation for a vehicle, said assembly comprising: a motor having at least two terminals extending therefrom for connecting to a power source; a carrier h

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6887096
PubMed Claim: A connector for making electrical connection, comprising: a first member having a plurality of terminals made of a conductive material; a second member having a plurality of electrodes that are provided at positions corresponding to the terminals and come into contact with the terminals, wherein the electrodes include at least one separation detecting electrode that is formed smaller than other electrodes, and wherein, upon separation of the first member and the second member, the terminal corresponding to the separation detecting electrode is separated from the separation detecting electrode prior to separation of other terminals from corresponding electrodes; and a signal generation circuit that generates a modulated signal having a predetermined format, wherein the modulated signal flows through the separation detecting electrode and the terminal corresponding to the separation detecting electrode only when the separation detecting electrode and the 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888571
PubMed Claim: A photosensor system comprising: a photosensor array including a plurality of photosensors arranged two-dimensionally; initializing means for applying a reset pulse signal to each of the photosensors of the photosensor array, thereby initializing the photosensors; signal readout means for applying a pre-charge pulse signal to each of the photosensors of the photosensor array, applying a readout pulse signal to each of the photosensors, and receiving a voltage output from each of the photosensors; and effective voltage adjusting means for applying, to each of the photosensors, correction signals for correcting, to optimal values, effective voltages of the signals applied to each of the photosensors by the initializing means and the signal readout means, wherein the correction signals applied by the effective voltage adjusting means set, at 0 V, average effective voltages of the signals applied to the photosensors by the initializing means and the signal 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888149
PubMed Claim: A method for correcting a beam irradiation position during processing in a focused ion beam apparatus, the method comprising: a first step of forming a linear line pattern on a sample by linearly irradiating a focused ion beam on a sample at a location removed from a processing region to be processed by irradiation of the focused ion beam; a second step of measuring a position of the linear line pattern in a direction perpendicular to the linear line pattern before processing; a third step of measuring at least once a position of the linear line pattern in a direction perpendicular to the linear line pattern; a fourth step of detecting a drift of the position of the linear line pattern measured by the third step in the direction perpendicular to the linear line pattern with respect to the position measured by the second step or the previously measured position; and a fifth step of correcting the beam irradiation position of the focused ion beam based on

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888567
PubMed Claim: An electronic photographing apparatus comprising: a photographing optical system which photographs an object; a data processing section which converts an optical image of the object photographed by the photographing optical system into digital image data, and records the converted image data therein; a display section which displays the image data obtained by the data processing section; a specification section which specifies a plurality of selected, non-contiguous regions in an image displayed at the display section for joined processing of autofocusing; an external operating member capable of externally operating a state of the electronic photographing apparatus; and a control section which controls an operation concerning photographing by th electronic photographing apparatus based on information specified by the specification section including the plurality of selected, non-contiguous regions jointly processed in an operation made by the external o

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6882709
PubMed Claim: An enhanced telephony services management system for controlling communications between a subscriber location and a head end in a broadband cable television communication system, the subscriber location including a local database, a display device and an interface device for interacting with the display device, and the head end including connections to a network mail server, a conventional broadcast television source and at least one remote network database, the enhanced telephony services management system comprising a plurality of user modules for interacting with the subscriber local database, display device and interface device to initiate and control the presentation and delivery of enhanced telephone services to the subscriber through the display device while simultaneously viewing programming from the conventional broadcast television source; and a plurality of network modulates for interacting with the network mail server and the head end in the

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6887631
PubMed Claim: A color filter, comprising: a plurality of pixels which are separated from each other by partitions formed on a formation area of a substrate, said pixels including a first coloring layer having an amount of coloring material; and a plurality of dummy pixels separated from each other by the partitions, said dummy pixels including a second coloring layer having an amount of coloring material substantially the same as the amount of coloring material of the first coloring layer, the pixels being active display elements that contribute to an image display and having light transmittance characteristics that are substantially uniform among each of said pixels, and the dummy pixels being inactive elements in the contributions to the image display.
Similarity Score: 0.6286


Patent ID: 6888149
PubMed Claim: A method for correcting a beam irradiation position during processing in a focused ion beam apparatus, the method comprising: a first step of forming a line

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6888202
PubMed Claim: An integrated circuit comprising: a latch circuit including, a first inverter including a first PMOS transistor and a first NMOS transistor with a first data node comprising interconnected source/drains (S/D) of the first PMOS and NMOS transistors; a second inverter including a second PMOS transistor and a second NMOS transistor with a second data node comprising interconnected source/drains (S/D) of the second PMOS and NMOS transistors; wherein the gates of the first PMOS and first NMOS transistors are coupled to the second data node; wherein the gates of the second PMOS and second NMOS transistors are coupled to the first data node; an input switch including, an access transistor including a first S/D coupled to the first data node and to the gate of the second PMOS transistor and to the gate of the second NMOS transistor and including a second S/D coupled to a first data access node and including a gate coupled to a first access control node; and a f

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Patent ID: 6886749
PubMed Claim: An image reading apparatus for reading an image on a read medium as attached to an information processing device including at least a display device and an intensity adjustment device for adjusting display intensity of the display device, as freely removable from the device, comprising: a read control device controlling a read of an image; a first notification device notifying the intensity adjustment device of the information processing device of a change in display intensity of the display device depending on a state of the read of an image controlled by said read control device; and a second notification device notifying the information processing device of a change in display direction of a part or all of the image displayed on the display device depending on a state of the read of the image controlled by said read control device.
Similarity Score: 0.6829


Patent ID: 6887096
PubMed Claim: A connector for making electrical connection, comprising: a 

In [172]:
df

Unnamed: 0,Unnamed: 1,top_claim_ids,cosine_similarity,claims,ï»¿,patent_id,text,section_id,subsection_id,group_id,subgroup_id,category,sequence,uuid
predict_n0,0,6888173,0.7624,A light emitting diode device comprising: a su...,115,6888173,A light emitting diode device comprising: a su...,['H'],['H01'],['H01L'],['H01L33/507'],['additional'],[1],0c97c6c1-ebc1-11ea-a344-121df0c29c1e
predict_n0,1,6888520,0.7164,"An active matrix type display, comprising: a s...",140,6888520,"An active matrix type display, comprising: a s...",['H'],['H01'],['H01L'],['H01L51/5271'],['additional'],[6],275aea04-ebb8-11ea-a344-121df0c29c1e
predict_n0,2,6887126,0.7158,An apparatus for thinning a semiconductor wafe...,37,6887126,An apparatus for thinning a semiconductor wafe...,['H'],['H01'],['H01L'],['H01L22/26'],['inventional'],[0],245bb0ee-ebc8-11ea-a344-121df0c29c1e
predict_n0,3,6964086,0.7140,A method of manufacturing thin film piezoelect...,210,6964086,A method of manufacturing thin film piezoelect...,['H'],['H01'],['H01L'],['H01L41/316'],['inventional'],[1],03c12e3b-ebc6-11ea-a344-121df0c29c1e
predict_n0,4,6887364,0.7095,A method for plating a metal strip to be used ...,45,6887364,A method for plating a metal strip to be used ...,['C'],['C25'],['C25D'],['C25D5/028'],['inventional'],[0],01317a10-ebce-11ea-a344-121df0c29c1e
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
predict_n59,35,6888173,0.5843,A light emitting diode device comprising: a su...,115,6888173,A light emitting diode device comprising: a su...,['H'],['H01'],['H01L'],['H01L33/507'],['additional'],[1],0c97c6c1-ebc1-11ea-a344-121df0c29c1e
predict_n59,36,6888071,0.5838,A layout method for supporting a plurality of ...,99,6888071,A layout method for supporting a plurality of ...,['H'],['H05'],['H05K'],['H05K2201/10522'],['additional'],[5],1d998ecd-ebbf-11ea-a344-121df0c29c1e
predict_n59,37,6888137,0.5827,A scanning charged-particle beam instrument ha...,111,6888137,A scanning charged-particle beam instrument ha...,['H'],['H01'],['H01J'],['H01J37/22'],['inventional'],[1],118b5915-ebce-11ea-a344-121df0c29c1e
predict_n59,38,6968958,0.5796,"An apparatus, comprising: a first electronic d...",221,6968958,"An apparatus, comprising: a first electronic d...",['H'],['H05'],['H05K'],['H05K5/0021'],['inventional'],[0],2a37abfe-ebba-11ea-a344-121df0c29c1e


In [1002]:
df_claim_cpc_test.patent_id.iloc[1]

6969627

In [1057]:
df_claim_cpc_test['true']=1.000
type(df_claim_cpc_test.true)


pandas.core.series.Series

In [946]:
k_similar_patents = df.xs(listofpredictdfs[0]).nlargest(2, ['cosine_similarity'])
k_similar_patents

Unnamed: 0,top_claim_ids,cosine_similarity,claims,ï»¿,patent_id,text,section_id,subsection_id,group_id,subgroup_id,category,sequence,uuid
0,6888173,0.7624,A light emitting diode device comprising: a su...,115,6888173,A light emitting diode device comprising: a su...,['H'],['H01'],['H01L'],['H01L33/507'],['additional'],[1],0c97c6c1-ebc1-11ea-a344-121df0c29c1e
1,6888520,0.7164,"An active matrix type display, comprising: a s...",140,6888520,"An active matrix type display, comprising: a s...",['H'],['H01'],['H01L'],['H01L51/5271'],['additional'],[6],275aea04-ebb8-11ea-a344-121df0c29c1e


In [967]:
top_k = 2

for k in range(0, top_k):
    top_n = k
    predict = pd.DataFrame(0, index=np.arange(1),columns= ['cosine_similarity'])
    #print(predict)
    for n in range(len(listofpredictdfs)):
        n=0 
        k_similar_patents = df.xs(listofpredictdfs[n]).nlargest(top_n, ['cosine_similarity'])
        #print('k_similar_patents')
        #result_k_similar_patents = pd.DataFrame(df_claim_cpc_test.group_id, columns=['group_id'])
#        result_k_similar_patents = pd.DataFrame(df.cosine_similarity, columns=['cosine_similarity'])
#        print(result_k_similar_patents)
        #print(k_similar_patents)
      #  print(result_k_similar_patents)
        
    result_k_similar_patents = pd.DataFrame(df.cosine_similarity, columns=['cosine_similarity'])
    print(result_k_similar_patents)
#    for i in range(0, top_n):
#        i= 0
#        result_k_similar_patents  = k_similar_patents.iloc[i, 1:]
#print('--', result_k_similar_patents)
        
#    result_k_similar_patents_df = pd.DataFrame(result_k_similar_patents, columns= ['cosine_similarity'])
 #   result_k_similar_patents_df.insert(0, "input_patent_id", df_claim_cpc_test.patent_id, True)
 #   locals()["predict"+str(n)] = result_k_similar_patents_df.copy()
#    predict = pd.concat([predict, locals()["predict"+str(n)]], ignore_index=True)
    #print(predict)
#result_k_similar_patents_df = result_k_similar_patents_df[0:0]
#print(result_k_similar_patents_df)

                cosine_similarity
predict_n0  0              0.7624
            1              0.7164
            2              0.7158
            3              0.7140
            4              0.7095
...                           ...
predict_n59 35             0.5843
            36             0.5838
            37             0.5827
            38             0.5796
            39             0.5788

[2400 rows x 1 columns]
                cosine_similarity
predict_n0  0              0.7624
            1              0.7164
            2              0.7158
            3              0.7140
            4              0.7095
...                           ...
predict_n59 35             0.5843
            36             0.5838
            37             0.5827
            38             0.5796
            39             0.5788

[2400 rows x 1 columns]


In [968]:
df.head()

Unnamed: 0,Unnamed: 1,top_claim_ids,cosine_similarity,claims,ï»¿,patent_id,text,section_id,subsection_id,group_id,subgroup_id,category,sequence,uuid
predict_n0,0,6888173,0.7624,A light emitting diode device comprising: a su...,115,6888173,A light emitting diode device comprising: a su...,['H'],['H01'],['H01L'],['H01L33/507'],['additional'],[1],0c97c6c1-ebc1-11ea-a344-121df0c29c1e
predict_n0,1,6888520,0.7164,"An active matrix type display, comprising: a s...",140,6888520,"An active matrix type display, comprising: a s...",['H'],['H01'],['H01L'],['H01L51/5271'],['additional'],[6],275aea04-ebb8-11ea-a344-121df0c29c1e
predict_n0,2,6887126,0.7158,An apparatus for thinning a semiconductor wafe...,37,6887126,An apparatus for thinning a semiconductor wafe...,['H'],['H01'],['H01L'],['H01L22/26'],['inventional'],[0],245bb0ee-ebc8-11ea-a344-121df0c29c1e
predict_n0,3,6964086,0.714,A method of manufacturing thin film piezoelect...,210,6964086,A method of manufacturing thin film piezoelect...,['H'],['H01'],['H01L'],['H01L41/316'],['inventional'],[1],03c12e3b-ebc6-11ea-a344-121df0c29c1e
predict_n0,4,6887364,0.7095,A method for plating a metal strip to be used ...,45,6887364,A method for plating a metal strip to be used ...,['C'],['C25'],['C25D'],['C25D5/028'],['inventional'],[0],01317a10-ebce-11ea-a344-121df0c29c1e


In [989]:
n1=df.xs(listofpredictdfs[0]).nlargest(3, ['cosine_similarity'])
n1.cosine_similarity.iloc[1]

0.7164

In [1080]:
top_k = 20
i=0
predict = pd.DataFrame(0, index=np.arange(1),columns= ['cosine_similarity'])
for i in range(0,len(listofpredictdfs)):
    n1=df.xs(listofpredictdfs[i]).nlargest(top_k, ['cosine_similarity'])
    n1_result=pd.DataFrame(n1.cosine_similarity.iloc[0:top_k],index=np.arange(top_k),columns=['cosine_similarity'])

    #print(n1_result)
    result_k_similar_patents_df = pd.DataFrame(n1_result, columns= ['cosine_similarity'])
    #print(result_k_similar_patents_df)
    #result_table=result_k_similar_patents_df.insert(0, "test_patent_id", df_claim_cpc_test.patent_id.iloc[i], True)
    #locals()["predict"+str(i)] = result_k_similar_patents_df.copy()
    #print(result_k_similar_patents_df)
    
    data = torch.tensor((result_k_similar_patents_df.to_numpy()).astype(float), dtype=torch.float32)
#    print(data)
    m = nn.Sigmoid()
    output = m(data)
    output = (output>0.65).float()
    #print(output)
    output_df = pd.DataFrame(output, columns=['cosine_similarity']).astype(float)
    #print(output_df)
    y_pred = output_df.to_numpy()
#    print(y_pred)
    y_true = df_claim_cpc_test.iloc[:top_k, 10:].to_numpy()
    #print(y_true)
    
    
  
    F1Measure_list.append(F1Measure(y_true,y_pred))
    Recall_list.append(Recall(y_true,y_pred))
    Accuracy_list.append(Accuracy(y_true, y_pred))
    Precision_list.append(Precision(y_true,y_pred))
    Hamming_Loss_list.append(Hamming_Loss(y_true, y_pred))
    print(i,"번째 F1Measure: ", F1Measure(y_true,y_pred))
    print("Recall: ", Recall(y_true,y_pred))
    print("Accuracy: ", Accuracy(y_true, y_pred))
    print("Precision: ", Precision(y_true,y_pred))
    print("Hamming_Loss: ", Hamming_Loss(y_true, y_pred))


    #predict = pd.concat([predict, locals()["predict"+str(i)]], ignore_index=True)
    #print(predict)
    #result_k_similar_patents_df = result_k_similar_patents_df[0:0]
    
    #predict_t = predict.iloc[:,:1]
    #print(predict_t)
#dict = {'F1Measure': F1Measure_list, 'Recall': Recall_list, 'Accuracy': Accuracy_list, 'Precision': Precision_list, 'Hamming_Loss': Hamming_Loss_list}
#df_ModelMetrics = pd.DataFrame(dict)
#df_ModelMetrics.to_csv('/output/modelpredict/Model_Metrics.csv'+'-'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), index = False)
#predict.to_csv(r'/output/modelpredict/predict_result_filteraugsbert.csv'+'-'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), index = False)

0 번째 F1Measure:  1.0
Recall:  1.0
Accuracy:  1.0
Precision:  1.0
Hamming_Loss:  0.0
1 번째 F1Measure:  1.0
Recall:  1.0
Accuracy:  1.0
Precision:  1.0
Hamming_Loss:  0.0
2 번째 F1Measure:  1.0
Recall:  1.0
Accuracy:  1.0
Precision:  1.0
Hamming_Loss:  0.0
3 번째 F1Measure:  1.0
Recall:  1.0
Accuracy:  1.0
Precision:  1.0
Hamming_Loss:  0.0
4 번째 F1Measure:  1.0
Recall:  1.0
Accuracy:  1.0
Precision:  1.0
Hamming_Loss:  0.0
5 번째 F1Measure:  1.0
Recall:  1.0
Accuracy:  1.0
Precision:  1.0
Hamming_Loss:  0.0
6 번째 F1Measure:  1.0
Recall:  1.0
Accuracy:  1.0
Precision:  1.0
Hamming_Loss:  0.0
7 번째 F1Measure:  1.0
Recall:  1.0
Accuracy:  1.0
Precision:  1.0
Hamming_Loss:  0.0
8 번째 F1Measure:  1.0
Recall:  1.0
Accuracy:  1.0
Precision:  1.0
Hamming_Loss:  0.0
9 번째 F1Measure:  1.0
Recall:  1.0
Accuracy:  1.0
Precision:  1.0
Hamming_Loss:  0.0
10 번째 F1Measure:  1.0
Recall:  1.0
Accuracy:  1.0
Precision:  1.0
Hamming_Loss:  0.0
11 번째 F1Measure:  1.0
Recall:  1.0
Accuracy:  1.0
Precision:  1.0
Hamming_L

In [1089]:
dict = {'F1Measure': F1Measure_list, 'Recall': Recall_list, 'Accuracy': Accuracy_list, 'Precision': Precision_list, 'Hamming_Loss': Hamming_Loss_list}
#print(len(F1Measure_list))
#print(len(Recall_list))
#print(len(Accuracy_list))
#print(len(Precision_list))
#print(len(Hamming_Loss_list))

df_ModelMetrics= pd.DataFrame.from_dict(dict, orient='index')
df_ModelMetrics
#df_ModelMetrics.to_csv(r'output/df_claim_cpc_new_claim_result.csv', index = False)
df_ModelMetrics.to_csv(r'output/Model_result_Metrics.csv'+'-'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), index = False)


#predict.to_csv(r'/output/predict_result_filteraugsbert.csv'+'-'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), index = False)

1104
945
942
942
942


In [1101]:
n1

Unnamed: 0,top_claim_ids,cosine_similarity,claims,ï»¿,patent_id,text,section_id,subsection_id,group_id,subgroup_id,category,sequence,uuid
0,6888173,0.7624,A light emitting diode device comprising: a su...,115,6888173,A light emitting diode device comprising: a su...,['H'],['H01'],['H01L'],['H01L33/507'],['additional'],[1],0c97c6c1-ebc1-11ea-a344-121df0c29c1e
1,6888520,0.7164,"An active matrix type display, comprising: a s...",140,6888520,"An active matrix type display, comprising: a s...",['H'],['H01'],['H01L'],['H01L51/5271'],['additional'],[6],275aea04-ebb8-11ea-a344-121df0c29c1e
2,6887126,0.7158,An apparatus for thinning a semiconductor wafe...,37,6887126,An apparatus for thinning a semiconductor wafe...,['H'],['H01'],['H01L'],['H01L22/26'],['inventional'],[0],245bb0ee-ebc8-11ea-a344-121df0c29c1e
3,6964086,0.714,A method of manufacturing thin film piezoelect...,210,6964086,A method of manufacturing thin film piezoelect...,['H'],['H01'],['H01L'],['H01L41/316'],['inventional'],[1],03c12e3b-ebc6-11ea-a344-121df0c29c1e
4,6887364,0.7095,A method for plating a metal strip to be used ...,45,6887364,A method for plating a metal strip to be used ...,['C'],['C25'],['C25D'],['C25D5/028'],['inventional'],[0],01317a10-ebce-11ea-a344-121df0c29c1e


In [1126]:
top_k = 5
i=0
predict = pd.DataFrame(0, index=np.arange(1),columns= ['cosine_similarity'])
for i in range(0,len(listofpredictdfs)):
    n1=df.xs(listofpredictdfs[i]).nlargest(top_k, ['cosine_similarity'])
    n1_result=pd.DataFrame(n1.cosine_similarity.iloc[0:top_k],index=np.arange(top_k),columns=['cosine_similarity'])
    n1_result.insert(0, 'group_id', n1.group_id.iloc[0:top_k], allow_duplicates=False)
    n1_result.insert(0, 'top_claim_ids', n1.top_claim_ids.iloc[0:top_k], allow_duplicates=False)


    result_k_similar_patents_df = pd.DataFrame(n1_result, columns= ['cosine_similarity', 'group_id','top_claim_ids'])
    result_k_similar_patents_df.insert(0, "test_patent_id", df_claim_cpc_test.patent_id.iloc[i], True)
    result_k_similar_patents_df.insert(0, "test_group_id", df_claim_cpc_test.group_id.iloc[i], True)
    locals()["predict"+str(i)] = result_k_similar_patents_df.copy()
    print(result_k_similar_patents_df)
    
result_k_similar_patents_df.to_csv(r'output/Predict_result_filteraugsbert.csv'+'-'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), index = False)

  test_group_id  test_patent_id  cosine_similarity  group_id  top_claim_ids
0      ['H01L']         6969626             0.7624  ['H01L']        6888173
1      ['H01L']         6969626             0.7164  ['H01L']        6888520
2      ['H01L']         6969626             0.7158  ['H01L']        6887126
3      ['H01L']         6969626             0.7140  ['H01L']        6964086
4      ['H01L']         6969626             0.7095  ['C25D']        6887364
  test_group_id  test_patent_id  cosine_similarity  group_id  top_claim_ids
0      ['H01L']         6969627             0.7851  ['H01L']        6962860
1      ['H01L']         6969627             0.7713  ['H01L']        6962874
2      ['H01L']         6969627             0.7708  ['H01L']        6962845
3      ['H01L']         6969627             0.7562  ['H01L']        6888195
4      ['H01L']         6969627             0.7542  ['H04N']        6961077
  test_group_id  test_patent_id  cosine_similarity  group_id  top_claim_ids
0      ['H01

In [1132]:
df_claim_cpc_test.iloc[:top_k, 5]

0    ['H01L']
1    ['H01L']
2    ['H01L']
3    ['H01L']
4    ['H01L']
Name: group_id, dtype: object

In [1157]:
top_k = 5
i=0
predict1 = pd.DataFrame(0, index=np.arange(1),columns= ['group_id'])
for i in range(0,len(listofpredictdfs)):
    n2=df.xs(listofpredictdfs[i]).nlargest(top_k, ['cosine_similarity'])
    n2_result=pd.DataFrame(n1.group_id.iloc[0:top_k],index=np.arange(top_k),columns=['group_id'])

    #print(n1_result)
    result_group_id_df = pd.DataFrame(n2_result, columns= ['group_id'])
    #print(result_k_similar_patents_df)
    #result_table=result_k_similar_patents_df.insert(0, "test_patent_id", df_claim_cpc_test.patent_id.iloc[i], True)
    #locals()["predict"+str(i)] = result_k_similar_patents_df.copy()
    print(result_group_id_df)
    
    data = torch.tensor((result_group_id_df.to_numpy()))
    data
#    m = nn.Sigmoid()
    
#    output_1 = (output>0.65).float()
    #print(output)
    output_1_df = pd.DataFrame(data, columns=['group_id']).astype(float)
    #print(output_df)
#    y_pred = data.to_numpy()
#    print(y_pred)
#    y_true = df_claim_cpc_test.iloc[:top_k, 5].to_numpy()
    #print(y_true)
    
    

   group_id
0  ['H04N']
1  ['H05K']
2  ['H04M']
3  ['H04M']
4  ['H03K']


TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.