<a href="https://colab.research.google.com/github/Azmouc1k/azmo-portfolio/blob/main/Patent_analysis_and_measures.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Patent Data from PatentsView

# 2. Patent Indicators and Variables

# 3. Patent Crawling - Google Patents

In [None]:
#install required python package

pip install requests
pip install html
pip install bs4
pip install progressbar2

In [None]:
#import libraries

import requests, csv, time
from tqdm import tqdm
from bs4 import BeautifulSoup
import collections

In [None]:
import pandas as pd
data_amd = pd.read_stata('amd_sample.dta')
data_amd.to_csv('amd_sample.csv')

data_nvidia = pd.read_stata('nvidia_sample.dta')
data_nvidia.to_csv('nvidia_sample.csv')

In [None]:
rs = data_amd['id'].tolist();
ls = []
for i in rs:
  new_str = i.replace('-', '')
  ls.append(new_str)

In [None]:
len(ls)

1603

In [None]:
OUTPUT_FILENAME = 'Practicals2.csv'
patent_numbers = ls
# ====================================================================================== #

header = [
    'patent number', 'title', 'abstract', 'current assignee', 'filing date', 'classification', 'backward citation count', 'backward_details', 'forward citation count', 'forward_details', 'number of claims',
]

def make_row(patent_number):
    url = 'https://patents.google.com/patent/{}'.format(patent_number)

    r = requests.get(url, timeout=7)
    b = BeautifulSoup(r.text, 'html')
    row = []

    # patent number
    _ = b.find('dd', attrs={'itemprop': 'publicationNumber'}).text
    row.append(_)

    # title
    _ = b.find('meta', attrs={'name': 'DC.title'}).attrs['content'].strip()
    row.append(_)

    # abstract
    _ = b.find('div', class_='abstract').text.strip()
    row.append(_)

    # Current Assignee
    _ = ', '.join([el.text.strip() for el in b.find_all('dd', attrs={'itemprop': 'assigneeCurrent'})])
    row.append(_)

    # Filing date
    _ = b.find('time', attrs={'itemprop': 'filingDate'}).text
    row.append(_)

    # Classification
    _ = ', '.join([ul.find_all('li')[-1].find('span', attrs={'itemprop': 'Code'}).text for ul in b.find_all('ul', attrs={'itemprop': 'classifications'})])
    row.append(_)

    # Patent citation count (backward)
    _ = len(b.find_all('tr', attrs={'itemprop': 'backwardReferencesOrig'})) + len(b.find_all('tr', attrs={'itemprop': 'backwardReferencesFamily'}))
    row.append(_)

    # Backward citation details
    items = []
    for tr in b.find_all('tr', attrs={'itemprop': 'backwardReferencesOrig'}):
        publicationNumber = tr.find('span', attrs={'itemprop': 'publicationNumber'})
        assigneeOriginal = tr.find('span', attrs={'itemprop': 'assigneeOriginal'})
        if (publicationNumber is None) or (assigneeOriginal is None): continue
        item = '{} ({})'.format(publicationNumber.text, assigneeOriginal.text)
        items.append(item)

    _ = ', '.join(items)
    row.append(_)

    # cited by
    _ = len(b.find_all('tr', attrs={'itemprop': 'forwardReferencesOrig'})) + len(b.find_all('tr', attrs={'itemprop': 'forwardReferencesFamily'}))
    row.append(_)

    # Forward Citation Details
    items = []
    for tr in b.find_all('tr', attrs={'itemprop': 'forwardReferencesOrig'}):
        publicationNumber = tr.find('span', attrs={'itemprop': 'publicationNumber'})
        assigneeOriginal = tr.find('span', attrs={'itemprop': 'assigneeOriginal'})
        publicationDate = tr.find(attrs={'itemprop': 'publicationDate'})
        if (publicationNumber is None) or (assigneeOriginal is None) or (publicationDate is None): continue
        item = '{} ({}) (&{}&)'.format(publicationNumber.text, assigneeOriginal.text[:10], publicationDate.text[:4])
        items.append(item)
    _ = ', '.join(items)
    row.append(_)

    # Number of claims
    _= int(b.find('section', attrs={'itemprop': 'claims'}).find('span', attrs={'itemprop': 'count'}).text)
    row.append(_)

    return row


with open(OUTPUT_FILENAME, 'w', encoding='utf-8') as f:
    writer = csv.writer(f)

    writer.writerow(header)

    #bar = progressbar.ProgressBar(max_len=len(patent_numbers))
    #for pn in bar(patent_numbers):
    for pn in tqdm(patent_numbers):
        try:
            writer.writerow(make_row(pn))
        except:
            writer.writerow([pn, 'error'])

        time.sleep(2)


100%|██████████| 1603/1603 [1:15:03<00:00,  2.81s/it]


# 4. Constructing Patent Measures

In [None]:
# import pandas
import pandas as pd
import numpy as np
import re

In [None]:
df = pd.read_csv("Practicals2.csv")
# To display the top & bottom 5 rows
df.head(5)
df.tail(5)

Unnamed: 0,patent number,title,abstract,current assignee,filing date,classification,backward citation count,backward_details,forward citation count,forward_details,number of claims
1598,US9152571B2,All invalidate approach for memory management ...,An input/output memory management unit (IOMMU)...,"ATI Technologies ULC, Advanced Micro Devices Inc",2012-07-31,"G06F12/1027, G06F12/1081, G06F2212/683",5.0,"US20070214339A1 (Microsoft Corporation), US201...",5.0,"US10042777B2 (Qualcomm I) (&2018&), US10162665...",21.0
1599,US9270969B2,3D video processing,"A method, an apparatus, and a non-transitory c...","ATI Technologies ULC, Advanced Micro Devices Inc",2013-03-05,"H04N13/00, H04N13/139, H04N13/0029, H04N13/003...",28.0,"JPH01171389A (Sharp Corp), JP2000209614A (Sony...",15.0,"US10523947B2 (Ati Techno) (&2019&), US10594901...",9.0
1600,US9304772B2,Ordering thread wavefronts instruction operati...,A system and method is provided for improving ...,"ATI Technologies ULC, Advanced Micro Devices Inc",2012-03-29,"G06F9/38, G06F9/30, G06F9/3836, G06F9/3851, G0...",5.0,"US20050166032A1 (Carsten Noeske), US7533236B1 ...",5.0,,17.0
1601,US9652019B2,System and method for adjusting processor perf...,A system and method for efficient management o...,"ATI Technologies ULC, Advanced Micro Devices Inc",2014-06-02,"G06F1/3206, G06F1/206, G06F1/324, G06F1/3296, ...",66.0,"US5451892A (Advanced Micro Devices), US6052268...",22.0,"WO2020145943A1 (Hewlett-Pa) (&2020&), CN111488...",17.0
1602,US9965392B2,Managing coherent memory between an accelerate...,Existing multiprocessor computing systems ofte...,"ATI Technologies ULC, Advanced Micro Devices Inc",2016-08-24,"G06F12/0815, G06F12/0806, G06F12/0835, G06F12/...",10.0,US5748938A (International Business Machines Co...,34.0,,12.0


In [None]:
#data information
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1603 entries, 0 to 1602
Data columns (total 11 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   patent number            1603 non-null   object 
 1   title                    1603 non-null   object 
 2   abstract                 1601 non-null   object 
 3   current assignee         1601 non-null   object 
 4   filing date              1601 non-null   object 
 5   classification           1601 non-null   object 
 6   backward citation count  1601 non-null   float64
 7   backward_details         1594 non-null   object 
 8   forward citation count   1601 non-null   float64
 9   forward_details          1406 non-null   object 
 10  number of claims         1601 non-null   float64
dtypes: float64(3), object(8)
memory usage: 137.9+ KB


## 4.1 Patent Scope

In [None]:
# Function to count the number of classification codes

def count_classification(classification):
    return len(classification.split(','))

In [None]:
df['classification'] = df['classification'].astype(str)

In [None]:
# Apply the function to create the 'patent_scope' column

df['patent_scope'] = df['classification'].apply(count_classification)

In [None]:
#Check Patent Scope

df

Unnamed: 0,patent number,title,abstract,current assignee,filing date,classification,backward citation count,backward_details,forward citation count,forward_details,number of claims,patent_scope
0,US10007464B1,Method and apparatus for integration of non-vo...,Described herein is a method and system for di...,Advanced Micro Devices Inc,2016-12-23,"G06F12/10, G06F3/0647, G06F12/08, G06F3/061, G...",3.0,"US7281104B1 (Acronis Inc.), US7603533B1 (Acron...",6.0,"CN109062826A (ç®ä¸°ç§æ) (&2018&), CN11044253...",20.0,14
1,US10013240B2,Fingerprinting of redundant threads using comp...,A first processing element is configured to ex...,Advanced Micro Devices Inc,2016-06-21,"G06F8/45, G06F9/30, G06F8/30, G06F11/1494, G06...",23.0,"US20050193283A1 (Reinhardt Steven K.), US20060...",11.0,,20.0,10
2,US10019283B2,Predicting a context portion to move between a...,A processing device includes a first memory th...,Advanced Micro Devices Inc,2015-06-22,"G06F9/3851, G06F9/461, G06F9/3013, G06F9/30145",11.0,"US6205543B1 (Sun Microsystems, Inc.), US623359...",7.0,,18.0,4
3,US10019365B2,Adaptive value range profiling for enhanced sy...,Enhanced adaptive profiling of ranges of value...,Advanced Micro Devices Inc,2016-04-15,"G06F12/0833, G06F12/0223, G06F12/0246, G06F12/...",14.0,"US20100169602A1 (Jared E Hulbert), US201201243...",61.0,US11169927B2 (EMC IP Hol) (&2021&),18.0,22
4,US10032308B2,Culling objects from a 3-D graphics pipeline u...,A shader in a graphics pipeline accesses an ob...,Advanced Micro Devices Inc,2016-06-22,"G06T15/405, G06T15/005, G06T15/80, G06T17/205,...",8.0,"US20030122850A1 (Satyaki Koneru), US6646639B1 ...",13.0,US11004258B2 (Advanced M) (&2021&),16.0,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1598,US9152571B2,All invalidate approach for memory management ...,An input/output memory management unit (IOMMU)...,"ATI Technologies ULC, Advanced Micro Devices Inc",2012-07-31,"G06F12/1027, G06F12/1081, G06F2212/683",5.0,"US20070214339A1 (Microsoft Corporation), US201...",5.0,"US10042777B2 (Qualcomm I) (&2018&), US10162665...",21.0,3
1599,US9270969B2,3D video processing,"A method, an apparatus, and a non-transitory c...","ATI Technologies ULC, Advanced Micro Devices Inc",2013-03-05,"H04N13/00, H04N13/139, H04N13/0029, H04N13/003...",28.0,"JPH01171389A (Sharp Corp), JP2000209614A (Sony...",15.0,"US10523947B2 (Ati Techno) (&2019&), US10594901...",9.0,12
1600,US9304772B2,Ordering thread wavefronts instruction operati...,A system and method is provided for improving ...,"ATI Technologies ULC, Advanced Micro Devices Inc",2012-03-29,"G06F9/38, G06F9/30, G06F9/3836, G06F9/3851, G0...",5.0,"US20050166032A1 (Carsten Noeske), US7533236B1 ...",5.0,,17.0,7
1601,US9652019B2,System and method for adjusting processor perf...,A system and method for efficient management o...,"ATI Technologies ULC, Advanced Micro Devices Inc",2014-06-02,"G06F1/3206, G06F1/206, G06F1/324, G06F1/3296, ...",66.0,"US5451892A (Advanced Micro Devices), US6052268...",22.0,"WO2020145943A1 (Hewlett-Pa) (&2020&), CN111488...",17.0,6


## 4.2 Originality Score

In [None]:
# Getting US Backward Citation Patents

def extract_us_patent_numbers(backward_details):
    us_patents = []
    for entry in backward_details.split(','):
        entry = entry.strip()
        if entry.startswith('US'):
            us_patents.append(entry.split()[0])
    return us_patents

In [None]:
df['backward_details'] = df['backward_details'].astype(str)

In [None]:
# Apply the function to create the new column

df['us_backward'] = df['backward_details'].apply(extract_us_patent_numbers)

In [None]:
# Extract US patent numbers and store in patent_numbers list for crawling

patent_numbers = []
for patents_list in df['us_backward']:
    for patent_number in patents_list:
        patent_numbers.append(patent_number)

In [None]:
patent_numbers

['US7281104B1',
 'US7603533B1',
 'US7953948B1',
 'US20050193283A1',
 'US20060095821A1',
 'US20060150186A1',
 'US7243262B2',
 'US7624448B2',
 'US7716668B2',
 'US7774787B2',
 'US7861228B2',
 'US7865770B2',
 'US8190982B2',
 'US8266697B2',
 'US8359578B2',
 'US20130254592A1',
 'US20140368513A1',
 'US9026847B2',
 'US9047192B2',
 'US9081688B2',
 'US9317379B2',
 'US9535696B1',
 'US9594648B2',
 'US6205543B1',
 'US6233599B1',
 'US6408325B1',
 'US20030046521A1',
 'US20050125802A1',
 'US20070022428A1',
 'US20130061239A1',
 'US20140189711A1',
 'US9378161B1',
 'US9582320B2',
 'US9652395B2',
 'US20100169602A1',
 'US20120124323A1',
 'US20120137075A1',
 'US20130185475A1',
 'US20150169227A1',
 'US20030122850A1',
 'US6646639B1',
 'US7382368B1',
 'US20130120380A1',
 'US20140176544A1',
 'US20150042650A1',
 'US20160086299A1',
 'US20170024926A1',
 'US20140344425A1',
 'US5966734A',
 'US20070276631A1',
 'US20100235670A1',
 'US20120331234A1',
 'US20140258637A1',
 'US9436603B1',
 'US20020194432A1',
 'US5977977A'

In [None]:
total_patent_numbers = len(patent_numbers)
print("Total number of patent numbers:", total_patent_numbers)

Total number of patent numbers: 18579


In [None]:
# Function to crawl patent page and extract classification codes
def get_classification_codes(patent_number):
    url = 'https://patents.google.com/patent/{}'.format(patent_number)
    try:
        r = requests.get(url, timeout=7)
        b = BeautifulSoup(r.text, 'html.parser')
        classifications = ', '.join([ul.find_all('li')[-1].find('span', attrs={'itemprop': 'Code'}).text for ul in b.find_all('ul', attrs={'itemprop': 'classifications'})])
        return classifications
    except Exception as e:
        print("Error processing patent {}: {}".format(patent_number, e))
        return None

In [None]:
# Apply the function to crawl each patent page and extract classification codes

df['classification_codes'] = df['us_backward'].apply(lambda x: [get_classification_codes(patent_number) for patent_number in tqdm(x)])

100%|██████████| 3/3 [00:02<00:00,  1.16it/s]
100%|██████████| 20/20 [00:19<00:00,  1.05it/s]
100%|██████████| 11/11 [00:10<00:00,  1.04it/s]
100%|██████████| 5/5 [00:06<00:00,  1.29s/it]
100%|██████████| 8/8 [00:09<00:00,  1.22s/it]
100%|██████████| 1/1 [00:05<00:00,  5.69s/it]
100%|██████████| 6/6 [00:06<00:00,  1.06s/it]
100%|██████████| 1/1 [00:00<00:00,  1.48it/s]
100%|██████████| 8/8 [00:10<00:00,  1.30s/it]
100%|██████████| 3/3 [00:02<00:00,  1.04it/s]
100%|██████████| 10/10 [00:09<00:00,  1.02it/s]
100%|██████████| 17/17 [00:16<00:00,  1.06it/s]
100%|██████████| 7/7 [00:06<00:00,  1.00it/s]
100%|██████████| 8/8 [00:08<00:00,  1.09s/it]
100%|██████████| 9/9 [00:10<00:00,  1.19s/it]
100%|██████████| 34/34 [00:43<00:00,  1.29s/it]
100%|██████████| 48/48 [00:54<00:00,  1.14s/it]
100%|██████████| 10/10 [00:08<00:00,  1.18it/s]
100%|██████████| 5/5 [00:04<00:00,  1.01it/s]
100%|██████████| 7/7 [00:07<00:00,  1.13s/it]
100%|██████████| 13/13 [00:12<00:00,  1.05it/s]
100%|██████████| 1

Error processing patent US7389402B2: HTTPSConnectionPool(host='patents.google.com', port=443): Read timed out. (read timeout=7)


100%|██████████| 20/20 [00:25<00:00,  1.25s/it]
100%|██████████| 6/6 [00:05<00:00,  1.04it/s]
100%|██████████| 26/26 [00:24<00:00,  1.05it/s]
100%|██████████| 7/7 [00:06<00:00,  1.10it/s]
100%|██████████| 5/5 [00:04<00:00,  1.16it/s]
100%|██████████| 11/11 [00:08<00:00,  1.23it/s]
100%|██████████| 3/3 [00:02<00:00,  1.08it/s]
100%|██████████| 34/34 [00:33<00:00,  1.01it/s]
100%|██████████| 5/5 [00:04<00:00,  1.22it/s]
100%|██████████| 11/11 [00:09<00:00,  1.17it/s]
100%|██████████| 16/16 [00:13<00:00,  1.16it/s]
100%|██████████| 6/6 [00:04<00:00,  1.37it/s]
100%|██████████| 7/7 [00:07<00:00,  1.10s/it]
100%|██████████| 12/12 [00:12<00:00,  1.02s/it]
100%|██████████| 9/9 [00:07<00:00,  1.21it/s]
100%|██████████| 4/4 [00:06<00:00,  1.59s/it]
100%|██████████| 19/19 [00:20<00:00,  1.09s/it]
100%|██████████| 26/26 [00:27<00:00,  1.06s/it]
100%|██████████| 23/23 [00:20<00:00,  1.14it/s]
100%|██████████| 6/6 [00:06<00:00,  1.02s/it]
100%|██████████| 7/7 [00:07<00:00,  1.14s/it]
100%|█████████

Error processing patent US5644755A: HTTPSConnectionPool(host='patents.google.com', port=443): Read timed out. (read timeout=7)


0it [00:00, ?it/s]
100%|██████████| 2/2 [00:01<00:00,  1.90it/s]
100%|██████████| 3/3 [00:02<00:00,  1.45it/s]
100%|██████████| 11/11 [00:09<00:00,  1.18it/s]
100%|██████████| 7/7 [00:07<00:00,  1.01s/it]
100%|██████████| 10/10 [00:09<00:00,  1.07it/s]
100%|██████████| 16/16 [00:17<00:00,  1.12s/it]
100%|██████████| 11/11 [00:33<00:00,  3.05s/it]
100%|██████████| 8/8 [00:05<00:00,  1.48it/s]
100%|██████████| 8/8 [00:05<00:00,  1.35it/s]
100%|██████████| 10/10 [00:06<00:00,  1.56it/s]
100%|██████████| 5/5 [00:05<00:00,  1.13s/it]
100%|██████████| 9/9 [00:13<00:00,  1.47s/it]
100%|██████████| 4/4 [00:02<00:00,  1.65it/s]
100%|██████████| 7/7 [00:09<00:00,  1.34s/it]
100%|██████████| 9/9 [00:05<00:00,  1.53it/s]
100%|██████████| 6/6 [00:08<00:00,  1.45s/it]
100%|██████████| 9/9 [00:08<00:00,  1.03it/s]
100%|██████████| 7/7 [00:05<00:00,  1.28it/s]
100%|██████████| 35/35 [00:42<00:00,  1.21s/it]
100%|██████████| 2/2 [00:01<00:00,  1.09it/s]
100%|██████████| 4/4 [00:07<00:00,  1.88s/it]
100

Error processing patent US20040083352A1: HTTPSConnectionPool(host='patents.google.com', port=443): Read timed out. (read timeout=7)


100%|██████████| 33/33 [00:41<00:00,  1.27s/it]
100%|██████████| 7/7 [00:05<00:00,  1.23it/s]
100%|██████████| 21/21 [00:17<00:00,  1.23it/s]
100%|██████████| 15/15 [00:14<00:00,  1.07it/s]
100%|██████████| 3/3 [00:03<00:00,  1.05s/it]
100%|██████████| 10/10 [00:09<00:00,  1.11it/s]
100%|██████████| 6/6 [00:12<00:00,  2.06s/it]
100%|██████████| 6/6 [00:08<00:00,  1.39s/it]
100%|██████████| 14/14 [00:12<00:00,  1.13it/s]
100%|██████████| 7/7 [00:05<00:00,  1.30it/s]
100%|██████████| 6/6 [00:06<00:00,  1.11s/it]
100%|██████████| 4/4 [00:03<00:00,  1.18it/s]
100%|██████████| 12/12 [00:10<00:00,  1.09it/s]
100%|██████████| 4/4 [00:05<00:00,  1.34s/it]
100%|██████████| 3/3 [00:02<00:00,  1.40it/s]
100%|██████████| 31/31 [00:35<00:00,  1.13s/it]
100%|██████████| 11/11 [00:14<00:00,  1.30s/it]
100%|██████████| 3/3 [00:02<00:00,  1.40it/s]
100%|██████████| 11/11 [00:09<00:00,  1.17it/s]
100%|██████████| 1/1 [00:01<00:00,  1.00s/it]
100%|██████████| 4/4 [00:02<00:00,  1.45it/s]
100%|██████████|

In [None]:
df.head(5)

Unnamed: 0,patent number,title,abstract,current assignee,filing date,classification,backward citation count,backward_details,forward citation count,forward_details,number of claims,patent_scope,us_backward,classification_codes
0,US10000124B2,"Independent steering, power, torque control an...","Systems, apparatus and methods to multiple lev...",Zoox Inc,2015-11-05,"B60L3/0092, B60L15/20, B60L15/2036, B60N2/002,...",125,WO1993007016A1 (Mannesmann Aktiengesellschaft)...,36,"US20190111803A1 (Hyundai Mo) (&2019&), US10543...",22,26,"[US5558370A, US5959552A, US6301542B1, US637416...","[B60R22/34, B60R21/0132, B60R21/015, B60R22/44..."
1,US10003168B1,Fiber laser with free-space components,"In one embodiment, a laser system includes a s...",Luminar Technologies Inc,2017-11-30,"H01S3/094053, G01S17/10, G01S17/42, G01S7/4804...",129,"US6449384B2 (Facet Technology Corp.), US671032...",89,"CN109254297A (æ­å·æ¬§é) (&2019&), US10295656...",24,36,"[US6449384B2, US6710324B2, US6723975B2, US6747...","[G06T7/74, G06T7/11, G06V10/443, G06V20/582, G..."
2,US10005317B2,Devices and methods of thermal management for ...,A method of thermal management for an electric...,Superpedestrian Ipco LLC,2015-11-24,"B60B27/0015, A61G5/045, A61G5/048, B60B27/04, ...",151,"US3199922A (Krenz Wheel Mfg), US3432158A (Warr...",49,"US10259311B2 (Superpedes) (&2019&), US10308065...",21,146,"[US3199922A, US3432158A, US3921741A, USD248747...","[B60B1/006, B60B1/0276, B60G17/025, B62M6/45, ..."


In [None]:
def calculate_originality(ref_cpcs, subclass=False):
    if subclass: ref_cpcs = [c[:4] for c in ref_cpcs]
    ref_cpcs_count = collections.Counter(ref_cpcs)
    total = len(ref_cpcs)
    originality = 1
    for c in ref_cpcs_count:
        originality -= (ref_cpcs_count[c] / total) ** 2
    return originality

In [None]:
for index, row in tqdm(df.iterrows()):
    ref_cpcs = row["classification_codes"]
    originality = calculate_originality(ref_cpcs, subclass=False) # subgroup level
    df.loc[index, "originality"] = originality

3it [00:00, 1017.87it/s]


In [None]:
for index, row in tqdm(df.iterrows()):
    ref_cpcs = row["classification_codes"]
    originality = calculate_originality(ref_cpcs, subclass=True) # subclass level
    df.loc[index, "originality2"] = originality

3it [00:00, 1327.87it/s]


In [None]:
df

Unnamed: 0,patent number,title,abstract,current assignee,filing date,classification,backward citation count,backward_details,forward citation count,forward_details,number of claims,patent_scope,us_backward,classification_codes,originality,originality2
0,US10000124B2,"Independent steering, power, torque control an...","Systems, apparatus and methods to multiple lev...",Zoox Inc,2015-11-05,"B60L3/0092, B60L15/20, B60L15/2036, B60N2/002,...",125,WO1993007016A1 (Mannesmann Aktiengesellschaft)...,36,"US20190111803A1 (Hyundai Mo) (&2019&), US10543...",22,26,"[US5558370A, US5959552A, US6301542B1, US637416...","[B60R22/34, B60R21/0132, B60R21/015, B60R22/44...",0.989796,0.923574
1,US10003168B1,Fiber laser with free-space components,"In one embodiment, a laser system includes a s...",Luminar Technologies Inc,2017-11-30,"H01S3/094053, G01S17/10, G01S17/42, G01S7/4804...",129,"US6449384B2 (Facet Technology Corp.), US671032...",89,"CN109254297A (æ­å·æ¬§é) (&2019&), US10295656...",24,36,"[US6449384B2, US6710324B2, US6723975B2, US6747...","[G06T7/74, G06T7/11, G06V10/443, G06V20/582, G...",0.989247,0.693028
2,US10005317B2,Devices and methods of thermal management for ...,A method of thermal management for an electric...,Superpedestrian Ipco LLC,2015-11-24,"B60B27/0015, A61G5/045, A61G5/048, B60B27/04, ...",151,"US3199922A (Krenz Wheel Mfg), US3432158A (Warr...",49,"US10259311B2 (Superpedes) (&2019&), US10308065...",21,146,"[US3199922A, US3432158A, US3921741A, USD248747...","[B60B1/006, B60B1/0276, B60G17/025, B62M6/45, ...",0.944843,0.839701


In [None]:
# Put out final data for analysis - csv format

df.to_csv('foranalysis.csv', index=False)