In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import time
import scipy.sparse
import re

## Classification Patents and Their Impacted Tasks

### Loading data

In [3]:
ai_patents = pd.read_csv('../data/processed/ai_patents_keywrods_alicpc_07092024.csv')

In [5]:
task_ratings = pd.read_csv('../data/raw/db_24_3_text/Task Ratings.txt', sep='\t').query('`Scale ID` == "IM"')
task_statements = pd.read_csv('../data/raw/db_24_3_text/Task Statements.txt', sep='\t')
jobs = pd.read_csv('../data/raw/db_24_3_text/Occupation Data.txt', sep='\t')
jobs['SOC Code'] = jobs['O*NET-SOC Code'].str[:7]
tasks = pd.merge(
    task_ratings,
    task_statements[['O*NET-SOC Code', 'Task ID', 'Task']],
    how='left',
    on=['O*NET-SOC Code', 'Task ID']
)

In [6]:
tasks = pd.merge(
    task_ratings,
    task_statements[['O*NET-SOC Code', 'Task ID', 'Task']],
    how='left',
    on=['O*NET-SOC Code', 'Task ID']
)

In [7]:
patent_matched_ids = np.load('../data/processed/Embeddings/ai_patents_matched_id.npy')
cosine_similarity = np.load('../data/processed/Embeddings/ai_related_patents_cosine_similarity_task.npy')

In [10]:
tasks['patents_cosine_similarity'] = cosine_similarity
tasks['ai_patents_matched_id'] =patent_matched_ids

In [16]:
ai_patent_both_methods_embedding_id_patent_id_dict = ai_patents.groupby('embedding_id')['patent_id'].apply(list).to_dict()

In [18]:
ai_embedding_id = list(ai_patents['embedding_id'])

In [19]:
ai_embedding_id_iloc_id = {iloc_id:i for iloc_id, i in enumerate(ai_embedding_id)}

In [20]:
tasks['patent_id'] = tasks['ai_patents_matched_id'].apply(lambda x: ai_patent_both_methods_embedding_id_patent_id_dict[ai_embedding_id_iloc_id[x]][0])

In [30]:
tasks = tasks.merge(ai_patents[['patent_id', 'type', 'number', 'country', 'date', 'year', 'abstract','title', 'kind','paper_id','citations', 'references',
       'disruption', 'edm_disruption' ]], on ='patent_id')

## Impacted vs Not Impacted Patents

In [31]:
quantile_threshold = tasks['patents_cosine_similarity'].quantile(0.9)

In [32]:
quantile_threshold 

0.7984446704387664

In [33]:
tasks_notexposedai = tasks[tasks['patents_cosine_similarity']<quantile_threshold]
tasks_exposedai= tasks[tasks['patents_cosine_similarity']>quantile_threshold]

## Disruptive AI

In [34]:
disruptive_threshold = ai_patents['disruption'].quantile(0.75)
consolidating_threshold = ai_patents['disruption'].quantile(0.25)

In [35]:
ai_patents[ai_patents['disruption']<consolidating_threshold]

Unnamed: 0.1,Unnamed: 0,patent_id,type,number,country,date,year,abstract,title,kind,...,cpc_current_group_average_patent_processing_days,term_extension,detail_desc_length,paper_id,frac_year,disruption,edm_disruption,citations,references,embedding_id
1,3654,10003688,utility,10003688,US,2018-06-19,2018,Systems for caller identification and authenti...,Systems and methods for cluster-based voice ve...,B1,...,456.0,,30155.0,3654,2018.416667,-0.003891,0.698857,4.0,63.0,3654
4,7195,10007263,utility,10007263,US,2018-06-26,2018,"Methods and systems for monitoring use, determ...",Autonomous vehicle accident and emergency resp...,B1,...,963.0,127.0,152850.0,7195,2018.416667,-0.004117,0.905069,21.0,108.0,7195
6,9160,10009240,utility,10009240,US,2018-06-26,2018,A method provides for associating reputation s...,System and method of recommending policies tha...,B2,...,867.0,231.0,92346.0,9160,2018.416667,-0.004407,0.850494,22.0,151.0,9160
15,19476,10019631,utility,10019631,US,2018-07-10,2018,A method of tracking a position of a target ob...,Adapting to appearance variations when trackin...,B2,...,769.0,40.0,60499.0,19476,2018.500000,-0.010000,0.774313,3.0,5.0,19476
17,19512,10019667,utility,10019667,US,2018-07-10,2018,Embodiments of the present invention provide a...,Transform for a neurosynaptic core circuit,B2,...,1010.0,187.0,25348.0,19512,2018.500000,-0.015873,0.756898,3.0,23.0,19512
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3228,6680491,9987747,utility,9987747,US,2018-06-05,2018,Disclosed is a stocker for receiving a cassett...,Stocker for receiving cassettes and method of ...,B2,...,850.0,46.0,16153.0,6680491,2018.416667,-0.019858,1.303688,14.0,26.0,6679684
3229,6681364,9988624,utility,9988624,US,2018-06-05,2018,The present disclosure provides a HTP microbia...,Microbial strain improvement by a HTP genomic ...,B2,...,878.0,,354323.0,6681364,2018.416667,-0.014766,0.915063,36.0,34.0,6680557
3231,6686030,9993313,utility,9993313,US,2018-06-12,2018,An instrument device manipulator IDM is attach...,Instrument device manipulator with roll mechanism,B2,...,936.0,,68269.0,6686030,2018.416667,-0.008458,0.826395,40.0,50.0,6685223
3235,6692150,9999476,utility,9999476,US,2018-06-19,2018,"Robotic and/or surgical devices, systems, and ...",Movable surgical mounting platform controlled ...,B2,...,936.0,113.0,57177.0,6692150,2018.416667,-0.003115,0.788385,3.0,5.0,6691343


In [36]:
disruptive_tasks = tasks_exposedai[(tasks_exposedai['disruption']>disruptive_threshold) ]

In [37]:
disruptive_tasks

Unnamed: 0,O*NET-SOC Code,Task ID,Scale ID,Category,Data Value,N,Standard Error,Lower CI Bound,Upper CI Bound,Recommend Suppress,...,date,year,abstract,title,kind,paper_id,citations,references,disruption,edm_disruption
224,11-3051.00,34,IM,,4.24,115,0.09,4.05,4.42,N,...,2016-01-05,2016,"In accordance with aspects of the disclosure, ...",Capacity expansion planning with production re...,B2,5927992,3.0,1.0,0.230769,0.981282
231,11-3051.00,33,IM,,3.95,98,0.10,3.76,4.14,N,...,2016-01-05,2016,"In accordance with aspects of the disclosure, ...",Capacity expansion planning with production re...,B2,5927992,3.0,1.0,0.230769,0.981282
386,11-3071.02,19512,IM,,3.95,21,,,,,...,2016-11-29,2016,Systems and methods for optimal planning and r...,Energy storage modeling and control,B2,6205172,9.0,2.0,0.136364,0.729405
387,11-3071.02,3310,IM,,3.95,20,,,,,...,2017-04-11,2017,"In one embodiment, a method includes determini...",Systems and methods for individualized routing...,B1,6315061,9.0,2.0,0.060870,0.894448
404,11-3071.02,19511,IM,,3.38,16,,,,,...,2016-11-29,2016,Systems and methods for optimal planning and r...,Energy storage modeling and control,B2,6205172,9.0,2.0,0.136364,0.729405
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19309,53-7033.00,15191,IM,,4.15,59,0.23,3.68,4.62,N,...,2016-10-04,2016,A multi-vehicle type co-production line flexib...,Multi-vehicle model collinear flexible framing...,B2,6154177,6.0,7.0,0.046875,1.374008
19335,53-7051.00,3201,IM,,4.35,120,0.19,3.98,4.72,N,...,2015-03-03,2015,In an infrastructure that uses a mobile order ...,Inter-facility transport in inventory manageme...,B1,5671498,84.0,2.0,0.540984,0.882959
19338,53-7051.00,3209,IM,,3.62,46,0.45,2.72,4.52,N,...,2018-03-13,2018,A vehicle trailer connect system and automated...,Vehicle trailer connect system,B2,6607572,22.0,3.0,0.052885,0.987450
19361,53-7062.00,10781,IM,,4.73,83,0.09,4.55,4.91,N,...,2015-03-03,2015,In an infrastructure that uses a mobile order ...,Inter-facility transport in inventory manageme...,B1,5671498,84.0,2.0,0.540984,0.882959


In [38]:
consolidating_tasks = tasks_exposedai[(tasks_exposedai['disruption']<consolidating_threshold) ]

In [39]:
consolidating_tasks 

Unnamed: 0,O*NET-SOC Code,Task ID,Scale ID,Category,Data Value,N,Standard Error,Lower CI Bound,Upper CI Bound,Recommend Suppress,...,date,year,abstract,title,kind,paper_id,citations,references,disruption,edm_disruption
288,11-3051.03,15461,IM,,4.22,58,0.12,3.97,4.46,N,...,2016-02-02,2016,Cooling control methods include measuring a te...,Coolant and ambient temperature control for ch...,B2,5948314,13.0,12.0,-0.010989,0.796320
291,11-3051.03,15451,IM,,3.98,56,0.14,3.70,4.27,N,...,2017-03-28,2017,Techniques for providing an entity monitoring ...,Entity monitoring for kiva robotic floors,B1,6302663,40.0,7.0,-0.138462,0.862130
352,11-3071.01,1037,IM,,4.26,71,0.11,4.04,4.48,N,...,2016-10-11,2016,A computer-implemented method for automated se...,Automated service management,B2,6162424,12.0,11.0,-0.019084,0.799165
385,11-3071.02,20941,IM,,4.00,22,,,,,...,2019-04-23,2019,An example method is carried out in a warehous...,Collaborative inventory monitoring,B2,263891,4.0,12.0,-0.004484,1.064662
410,11-3071.03,15752,IM,,4.13,30,,,,,...,2017-08-15,2017,Example systems and methods may provide for a ...,Heterogeneous fleet of robots for collaborativ...,B1,6428105,17.0,12.0,-0.010340,0.764071
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19450,53-7081.00,7176,IM,,4.77,44,0.11,4.55,4.98,N,...,2017-04-18,2017,An autonomous device for loading and unloading...,Autonomous truck loader and unloader,B2,6318818,6.0,70.0,-0.006508,0.790221
19464,53-7081.00,7182,IM,,3.79,23,0.28,3.21,4.37,N,...,2017-04-18,2017,An autonomous device for loading and unloading...,Autonomous truck loader and unloader,B2,6318818,6.0,70.0,-0.006508,0.790221
19487,53-7121.00,12798,IM,,4.24,63,0.12,4.00,4.48,N,...,2017-04-18,2017,An autonomous device for loading and unloading...,Autonomous truck loader and unloader,B2,6318818,6.0,70.0,-0.006508,0.790221
19496,53-7121.00,12805,IM,,3.87,31,0.26,3.34,4.41,N,...,2016-11-01,2016,A material handling system comprises a main co...,Robotic sortation system,B1,6177674,3.0,5.0,-0.005814,0.922881
