In [2]:
import pandas as pd
import numpy as np
from google.cloud import bigquery
%load_ext google.cloud.bigquery

import matplotlib.pyplot as plt
from matplotlib import ticker
import seaborn as sns
sns.set(style='darkgrid', font_scale=1.5)
current_palette = sns.color_palette('deep')

# 1. Creating USPTO data table

In [12]:
client = bigquery.Client()
# Creating Job Config
job_config = bigquery.QueryJobConfig()
#job_config.dry_run = True
job_config.use_query_cache = False
# Set configuration.query.writeDisposition
job_config.write_disposition = 'WRITE_TRUNCATE'

# Set the destination table
project_id = 'usptobias'
dataset_id = 'data_preparation'
table_id = '3_appln_uspto'
table_ref = client.dataset(dataset_id).table(table_id)
job_config.destination = table_ref

query="""
WITH t1 AS(
    SELECT * EXCEPT(application_type, filing_date, customer_number, atty_docket_number,
                    appl_status_code, appl_status_date, patent_number, patent_issue_date,
                    abandon_date, disposal_type, small_entity_indicator, examiner_id, examiner_art_unit), 
            IFNULL(application_type, 'NULL') application_type,
            IFNULL(filing_date, '9999-12-31') filing_date,
            IFNULL(customer_number, '9999-12-31') customer_number,
            IFNULL(atty_docket_number, '0') atty_docket_number,
            IFNULL(appl_status_code, '0') appl_status_code, 
            IFNULL(appl_status_date, '31dec9999 00:00:00') appl_status_date, 
            IFNULL(patent_number, '0') patent_number, 
            IFNULL(patent_issue_date, '9999-12-31') patent_issue_date,
            IFNULL(abandon_date, '9999-12-31') abandon_date,
            IFNULL(disposal_type, 'NULL') disposal_type,
            IFNULL(small_entity_indicator, 'NULL') small_entity_indicator,
            IFNULL(examiner_id, '0') examiner_id, 
            IFNULL(examiner_art_unit, '0') examiner_art_unit,  
            IFNULL(CAST(REGEXP_EXTRACT(filing_date, r'(\d{{4}})-\d{{2}}-\d{{2}}') AS INT64), 9999) AS filing_year
    FROM `patents-public-data.uspto_oce_pair.application_data`
   
), t2 AS(
    SELECT 
        a.application_number_pair AS application_number,
        a.application_number AS appln_nr_orig
    FROM `patents-public-data.uspto_oce_pair.match` AS a
), t3 AS(
    SELECT t1.*, t2.appln_nr_orig
    FROM t1
    LEFT JOIN t2 USING(application_number)
), t4 AS(
    SELECT a.*, t2.appln_nr_orig
    FROM(
        SELECT 
            application_number,
            ARRAY_AGG(STRUCT(event_code, recorded_date, sequence_number, status_code) ORDER BY sequence_number) AS transactions
        FROM `patents-public-data.uspto_oce_pair.transactions` AS a
        GROUP BY application_number
        
    ) AS a
    LEFT JOIN t2 USING(application_number)
), t5 AS(
    SELECT a.*, b.application_number AS appln_nr_orig
    FROM(
        SELECT 
            a.patentCaseMetadata.applicationNumberText.value AS application_number,
            ARRAY_AGG(name.personFullName IGNORE NULLS) AS examiner_fullName,
            ANY_VALUE(IFNULL(a.patentCaseMetadata.patentGrantIdentification.grantDate, '9999-12-31')) AS grant_date,
            ANY_VALUE(a.prosecutionHistoryDataOrPatentTermData) AS actions
        FROM 
            `patents-public-data.uspto_peds.applications` AS a,
            UNNEST(a.patentCaseMetadata.partyBag.applicantBagOrInventorBagOrOwnerBag) AS bag,
            UNNEST(bag.primaryExaminerOrAssistantExaminerOrAuthorizedOfficer) AS PE,
            UNNEST(PE.name.personNameOrOrganizationNameOrEntityName) AS name
        GROUP BY application_number

    ) AS a
    LEFT JOIN `patents-public-data.uspto_peds.match` AS b 
    ON a.application_number = b.applicationNumberText
), t6 AS(
    SELECT appln_id, appln_nr_orig
    FROM `{0}.data_preparation.1_matching_applnNr_applnId`
), t7 AS(
    SELECT *
    FROM(
        SELECT 
            t6.*,
            t4.transactions,
            t5.examiner_fullName,
            t5.grant_date,
            t5.actions
        FROM t6
        LEFT JOIN t4 ON t6.appln_nr_orig=t4.appln_nr_orig
        LEFT JOIN t5 ON t6.appln_nr_orig=t5.appln_nr_orig
    )
    WHERE (ARRAY_LENGTH(transactions)<>0) OR (examiner_fullName IS NOT NULL)

)


SELECT 
    t7.*,
    t3.application_type AS appln_type,
    t3.filing_date,
    t3.customer_number,
    t3.atty_docket_number AS attorney_number,
    t3.appl_status_code AS status_code,
    t3.appl_status_date AS status_date,
    t3.patent_number AS patent_nr,
    t3.patent_issue_date AS issue_date,
    t3.abandon_date,
    t3.small_entity_indicator AS small_entity,
    t3.disposal_type,
    STRUCT(examiner_name_last AS last, examiner_name_first AS first, examiner_name_middle AS middle) AS examiner_name,
    t3.examiner_id,
    t3.examiner_art_unit
    
FROM t7
LEFT JOIN t3 ON t7.appln_nr_orig=t3.appln_nr_orig
""".format(project_id)

# Defining the query
query_job = client.query(query, location='US', job_config=job_config)

query_job.result()

<google.cloud.bigquery.table.RowIterator at 0x23aea02a898>