### Patent Citation

In [None]:
import json
import re
import time
import ast 
import requests
import pandas as pd
from tqdm import tqdm

api_key = "Your Api Key"

In [None]:
def get_number_of_citation(matching_method,company_name, year):
    url = "https://search.patentsview.org/api/v1/patent/"
    headers = {
        "accept": "application/json",
        "X-Api-Key": api_key
    
    }

    params = {
    "f": json.dumps(["patent_num_times_cited_by_us_patents","patent_id"]),
    "o": json.dumps({"size": 1000}),
    "q": json.dumps({
        "_and": [
           # _text_phrase, _begins
            {matching_method: {"assignees.assignee_organization": company_name}},
            {"_eq": {"patent_year": year}},
            {"_eq": {"patent_type": "utility"}},
        ]
    }),
    "s": json.dumps([{"patent_id": "asc"}])
}


    response = requests.get(url, headers=headers, params=params)

    return response.json()

In [None]:
def get_number_of_citation_v2(matching_method,company_name, year,p_id):
    url = "https://search.patentsview.org/api/v1/patent/"
    headers = {
        "accept": "application/json",
        "X-Api-Key": api_key
    
    }

    params = {
    "f": json.dumps(["patent_num_times_cited_by_us_patents","patent_id"]),
    "o": json.dumps({"size": 1000}),
    "q": json.dumps({
        "_and": [
           # _text_phrase, _begins
            {matching_method: {"assignees.assignee_organization": company_name}},
            {"_eq": {"patent_year": year}},
            {"_eq": {"patent_type": "utility"}},
            {"_gt":{"patent_id":p_id}}
        ]
    }),
    "s": json.dumps([{"patent_id": "asc"}])
}
    response = requests.get(url, headers=headers, params=params)

    return response.json()

In [127]:
d = get_number_of_citation("_text_phrase","dsm n.v.","2000")
total_result = d["patents"]
total_count = 0
for i in range (len(total_result)):
    total_count+=total_result[i]["patent_num_times_cited_by_us_patents"]
total_count

732

In [128]:
# company_df = pd.read_excel("data/top_patent.xlsx",sheet_name="Sheet2")
citation_df = pd.read_csv("rok_data/citation_data.csv")
company_list = citation_df["lookup_name"].to_list()

In [149]:
# company_list =["Advanced Micro Devices, Inc."]
# citation_df=pd.DataFrame(company_list)
for year in (range (2018,2024)):
    citation_list=[]
    for company_name in tqdm(company_list):
        time.sleep(1)
        result=get_number_of_citation("_text_phrase",company_name,str(year))
        hits = result["total_hits"]
        total_result = result["patents"]

        while hits>1000:
            time.sleep(1)
            try:
                result=get_number_of_citation_v2("_text_phrase",company_name,str(year),result["patents"][-1]["patent_id"])
            except Exception as e:
                hits = hits-1000
                continue
            total_result+=result["patents"]
            hits = hits-1000
        
        total_count = 0
        for i in range (len(total_result)):
            total_count+=total_result[i]["patent_num_times_cited_by_us_patents"]

        citation_list.append(total_count)
    citation_df[str(year)]=citation_list


100%|██████████| 434/434 [14:14<00:00,  1.97s/it]
100%|██████████| 434/434 [14:45<00:00,  2.04s/it]
100%|██████████| 434/434 [14:47<00:00,  2.04s/it]
100%|██████████| 434/434 [14:44<00:00,  2.04s/it]
100%|██████████| 434/434 [14:28<00:00,  2.00s/it]
100%|██████████| 434/434 [14:26<00:00,  2.00s/it]


In [150]:
citation_df.to_csv("rok_data/citation_data.csv", index = False)

### Match by Company Name

In [98]:
def get_name(matching_method,company_name):   
    url = "https://search.patentsview.org/api/v1/assignee/"
    headers = {
        "accept": "application/json",
        "X-Api-Key": api_key

    }

    params = {
    "f": json.dumps(["assignee_id","assignee_organization"]),
    "o": json.dumps({"size": 1000}),
    "q": json.dumps({matching_method: {"assignee_organization": company_name}})
    #"s": json.dumps([{"assignee_id": "asc"}])
    }


    response = requests.get(url, headers=headers, params=params)

    return response.json()

In [100]:
def clean_company_name(name):
    # Remove commas and periods
    name = re.sub(r'[,.]', '', name)
    
    # Remove company suffixes
    name = re.sub(r'\b(Corp|Co|Inc|Ltd)\b', '', name, flags=re.IGNORECASE).strip()
    
    return name

In [101]:
lookup_df = pd.DataFrame()
lookup_df["company_name"]=company_list
lookup_df["lookup_name"]=None

In [102]:
suggestion_list = []
for i in company_list: 
    result=get_name("_begins",clean_company_name(i))
    temp_list = []
    if result["count"]>0:
        len_result = len(result["assignees"])
    else:
        len_result =0
    if len_result > 10:
        len_result =10
    for j in range(len_result):
        temp_list.append(result["assignees"][j]["assignee_organization"])
    time.sleep(1)
    suggestion_list.append(temp_list)

lookup_df["suggested_list"]=suggestion_list

In [104]:
lookup_df.to_csv("dataset/lookup_table.csv", index = False)

In [105]:
company_df = pd.read_csv("rok_data/lookup_table.csv")

In [None]:
company_df["company_name"].fillna(method='ffill', inplace=True)

In [107]:
company_df["suggested_list"]=company_df["suggested_list"].fillna("[]")

In [None]:

company_df["suggested_list"] = company_df["suggested_list"].apply(ast.literal_eval)


In [109]:
company_df["lookup_name"] = company_df.apply(
    lambda row: row["suggested_list"][0] if pd.isna(row["lookup_name"]) and len(row["suggested_list"]) == 1 else row["lookup_name"], 
    axis=1
)

In [110]:
company_df["lookup_name"] = company_df["lookup_name"].fillna(company_df["company_name"])

In [111]:
company_df.drop(columns=["suggested_list"], inplace=True)

In [112]:
company_df.to_csv("rok_data/lookup_table_v1.csv", index = False)