In [1]:
import pandas as pd

import wget
import zipfile
import os

import plotly.io as pio
pio.templates.default = "plotly_white" # Set plotly theme

if not os.path.isfile("rawinventor.tsv"):
    wget.download("https://s3.amazonaws.com/data.patentsview.org/download/rawinventor.tsv.zip")
    with zipfile.ZipFile("rawinventor.tsv.zip", 'r') as zip_ref:
        zip_ref.extractall(".")
    os.remove("rawinventor.tsv.zip")

if not os.path.isfile("disambiguation.tsv"):
    rawinventor = pd.read_csv("rawinventor.tsv", sep="\t")
    rawinventor["mention-id"] = "US" + rawinventor.patent_id.astype(str) + "-" + rawinventor.sequence.astype(str)
    rawinventor[["mention-id", "inventor_id"]].to_csv("disambiguation.tsv", sep="\t", index=False)
    del(rawinventor)

In [2]:
from pv_evaluation.benchmark import (
    inventor_benchmark_table,
    inventor_benchmark_plot,
)

data = pd.read_csv("disambiguation.tsv", sep="\t")
data.set_index("mention-id", inplace=True)
result = data["inventor_id"]

disambiguations = {
    "current": result,
    "one cluster": 0*result
}

In [3]:
inventor_benchmark_table(disambiguations)

Unnamed: 0,benchmark,metric,algorithm,value
0,patentsview-inventors,cluster precision,current,0.836364
1,patentsview-inventors,cluster precision,one cluster,0.0
2,patentsview-inventors,cluster recall,current,0.985075
3,patentsview-inventors,cluster recall,one cluster,1.0
4,patentsview-inventors,cluster f1,current,0.904648
5,patentsview-inventors,cluster f1,one cluster,0.0
6,israeli-inventors,cluster precision,current,0.986606
7,israeli-inventors,cluster precision,one cluster,0.0
8,israeli-inventors,cluster recall,current,0.943729
9,israeli-inventors,cluster recall,one cluster,1.0


In [4]:
inventor_benchmark_plot(disambiguations)

In [5]:
from pv_evaluation.benchmark import inspect_clusters_to_split, inspect_clusters_to_merge, load_israeli_inventors_benchmark

In [6]:
inspect_clusters_to_split(result, load_israeli_inventors_benchmark())

Unnamed: 0_level_0,prediction,reference
mention-id,Unnamed: 1_level_1,Unnamed: 2_level_1
US4647259-3,fl:ab_ln:amiel-1,472
US4437263-1,fl:ab_ln:amiel-1,470
US4696347-2,fl:ad_ln:stolov-1,16522
US4580876-1,fl:ad_ln:stolov-1,16522
US5617152-0,fl:ad_ln:stolov-1,16522
...,...,...
US4759382-0,fl:ze_ln:harel-2,7856
US5256972-2,fl:ze_ln:harel-2,7857
US5873891-0,fl:ze_ln:sohn-10,16239
US5549555-0,fl:ze_ln:sohn-10,16237


In [7]:
inspect_clusters_to_merge(result, load_israeli_inventors_benchmark())

Unnamed: 0_level_0,reference,prediction
mention-id,Unnamed: 1_level_1,Unnamed: 2_level_1
US5855074-0,13,fl:mo_ln:abitbol-1
US5825476-0,13,h6uzumjdp27ulvxe69um3plwa
US5784282-0,13,fl:ma_ln:abitbol-2
US5688262-0,20,fl:ma_ln:abraham-26
US5783798-0,20,fl:ma_ln:abraham-26
...,...,...
US5288705-0,18670,fl:jo_ln:zohar-2
US5344433-0,19132,fl:el_ln:talmore-1
US5344434-0,19132,fl:el_ln:talmore-1
US5851181-0,19132,fl:el_ln:talmor-1
