# Recomendador de artigos - OASIS

## Importação das bibliotecas

In [78]:
from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
from urllib.request import URLError, HTTPError
import pandas as pd

pd.options.display.max_columns = 999
pd.options.display.max_info_rows = 999

## Funções

In [79]:
def resposta_html(url,headers):
    '''
    Esta função recebe uma URL como entrada, faz uma solicitação HTTP usando o cabeçalho armazenado na variável de instância headers, 
    lê a resposta HTTP e retorna o HTML da resposta.
    '''
    try: 
        req = Request(url, headers= headers)
        response = urlopen(req)
        html = response.read()
    except HTTPError as e:
        print(e.status, e.reason)
    except URLError as e:
        print(e.reason)
    return html


def obtem_total_paginas(html):
    '''
    Esta função recebe o HTML da página de resultados de pesquisa e retorna o número total de páginas 
    de resultados que correspondem à pesquisa. 
    '''
    soup = BeautifulSoup(html, 'html.parser')
    total_paginas = soup.find('ul', {'class': 'pagination'}).find_all('li')[-1].get_text()
    total_paginas = total_paginas.replace('[', '').replace(']', '')
    total_paginas = int(total_paginas)
    return total_paginas


def links_paginas(url, total_paginas,headers):
    '''
    Esta função recebe uma URL de pesquisa e o número total de páginas de resultados como entrada. 
    Em seguida, extrai os links para todas as páginas de resultados e retorna uma lista de links para cada página.
    '''
    links_paginas = []
    numero_pagina = 1
    while numero_pagina < total_paginas + 1:
        numero_pagina = str(numero_pagina)
        resposta_html_pagina = resposta_html(url + f'&page={numero_pagina}',headers=headers)
        soup = BeautifulSoup(resposta_html_pagina, 'html.parser')
        for card in soup.findAll('div', {'class': 'result'}):
            links_paginas.append('https://oasisbr.ibict.br' + card.find('a', {'class': 'title getFull'}).get('href'))
        numero_pagina = int(numero_pagina)
        numero_pagina += 1
    return links_paginas


def scraping_dados(links_paginas,headers):
    '''
    A função extrai informações como título, autor, data de publicação e tipo de documento para cada página e armazena 
    as informações em uma lista.
    '''
    dados_lista = []
    
    for link in links_paginas:
        url = link
        html = resposta_html(url,headers=headers)
        soup = BeautifulSoup(html,'html.parser')

        table = soup.find('table',{'class':'citation table table-striped'})

        data = {}
        for row in table.find_all('tr'):
            th = row.find('th').get_text()
            td = row.find('td').get_text()
            data[th] = td
        dados_lista.append(data)


            

    return dados_lista


def dataframe_dados_lista(lista_dados):
    '''
    Esta função transforma uma lista de dicionários com os dados coletados da raspagem e retorna um DataFrame do pandas com esses dados.
    
    '''
    
    df = pd.DataFrame(lista_dados)[['title','description','url','eu_rights_str_mv','network_name_str','topic']]

    df['title'] = df['title'].str.strip().str.replace('\n                  ','')
    df['description'] = df['description'].str.strip().str.replace('\n                  ','')
    df['url'] = df['url'].str.replace('\n                  ','').str.replace('\n','')
    df['eu_rights_str_mv'] = df['eu_rights_str_mv'].str.replace('\n                  ','').str.replace('\n','')
    df['network_name_str'] = df['network_name_str'].str.replace('\n                  ','').str.replace('\n','')
    df['topic'] = df['topic'].str.strip().str.replace('.','').str.split('\n                  ').apply(lambda x: ';'.join(x) if isinstance(x, list) else x)

    return df

## Coleta de dados

In [80]:
url = 'https://oasisbr.ibict.br/vufind/Search/Results?filter%5B%5D=%7Eformat%3A%22masterThesis%22&filter%5B%5D=%7Eformat%3A%22doctoralThesis%22&type=AllFields&daterange%5B%5D=publishDate&publishDatefrom=2023&publishDateto=2023'
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'}

resposta = resposta_html(url,headers)

total_paginas = obtem_total_paginas(resposta)

links = links_paginas(url,total_paginas,headers)

In [81]:
lista_dados = scraping_dados(links,headers)

df = dataframe_dados_lista(lista_dados)

df.to_csv('teses_dissertacoes_oasis_2023_20231003.csv',sep=';;;',index=False)


## Transformações dos dados

In [82]:
df = pd.read_csv('artigos_oasis_2023_20231003.csv',sep=';;;')

In [83]:
df

Unnamed: 0,title,description,url,eu_rights_str_mv,network_name_str,topic
0,Uso de Audit and Feedback: lições de uma pesqu...,"Resumo Objetivo Avaliar, a partir do discurso ...",http://old.scielo.br/scielo.php?script=sci_art...,openAccess,Acta Paulista de Enfermagem (Online),Ciência da implementação;Saúde mental;Política...
1,Bioethics and sociology: the place of social s...,The social sciences have integrated the analyt...,http://old.scielo.br/scielo.php?script=sci_art...,openAccess,Interface (Botucatu. Online),Bioethics;Social sciences;Technology;Global he...
2,Supervisão acadêmica de médicos em áreas indíg...,Trata-se de estudo qualitativo sobre o papel d...,http://old.scielo.br/scielo.php?script=sci_art...,openAccess,Interface (Botucatu. Online),Educação médica;Povos indígenas;Programa Mais ...
3,Um roteiro virtual sobre ativismo biossocial e...,A dificuldade em demarcar um termo que designe...,http://old.scielo.br/scielo.php?script=sci_art...,openAccess,Interface (Botucatu. Online),Ativismo político;Recusa do paciente ao tratam...
4,Macronutrients of Brown and Green Water Types ...,Abstract Biofloc technology (BFT) systems are ...,http://old.scielo.br/scielo.php?script=sci_art...,openAccess,Brazilian Archives of Biology and Technology,biofloc technology;flocponics;halophytes;Litop...
...,...,...,...,...,...,...
18391,Terapia por ondas de choque de baixa intensida...,OBJETIVO: Avaliar o efeito da Terapia por Onda...,https://saberaberto.homologacao.uneb.br/handle...,openAccess,Saber Aberto – Repositório Institucional da UNEB,Disfunção erétil;Tratamento;Fisioterapia
18392,Frequência do uso de dispositivos eletrônicos ...,Objetivo: Estimar a frequência do uso de dispo...,https://saberaberto.homologacao.uneb.br/handle...,openAccess,Saber Aberto – Repositório Institucional da UNEB,Cigarro Eletrônico;Sistemas Eletrônicos de Lib...
18393,Intervenções fisioterapêuticas para o tratamen...,OBJETIVO: Revisar os conhecimentos científicos...,https://saberaberto.homologacao.uneb.br/handle...,openAccess,Saber Aberto – Repositório Institucional da UNEB,Diástase;Reto Abdominal;Período Pós-Parto;Fisi...
18394,Análise de dados do twitter: uma perspectiva p...,"Durante a pandemia, ampliou-se o uso das redes...",https://saberaberto.homologacao.uneb.br/handle...,openAccess,Saber Aberto – Repositório Institucional da UNEB,Twitter;Análise de sentimentos;Processamento d...


In [84]:
df['qtd_termos'] = df['topic'].str.split(';;;').apply(lambda x: len(x) if isinstance(x,list) else 0)

In [85]:
df.qtd_termos.value_counts()

qtd_termos
1      3189
5      3056
0      2184
4      2105
6      1958
8      1022
3      1006
7       785
10      733
9       701
12      447
2       324
15      241
11      223
13      103
14      103
16       76
20       41
18       34
17       22
19       11
21        9
22        6
25        4
23        3
28        3
30        2
46        1
24        1
29        1
27        1
109       1
Name: count, dtype: int64

In [86]:
df.qtd_termos.value_counts(normalize=True) * 100

qtd_termos
1      17.335290
5      16.612307
0      11.872146
4      11.442705
6      10.643618
8       5.555556
3       5.468580
7       4.267232
10      3.984562
9       3.810611
12      2.429876
2       1.761252
15      1.310067
11      1.212220
13      0.559904
14      0.559904
16      0.413133
20      0.222875
18      0.184823
17      0.119591
19      0.059796
21      0.048924
22      0.032616
25      0.021744
23      0.016308
28      0.016308
30      0.010872
46      0.005436
24      0.005436
29      0.005436
27      0.005436
109     0.005436
Name: proportion, dtype: float64

In [87]:
(df.qtd_termos.value_counts(normalize=True) * 100).reset_index().query('3 < qtd_termos < 10').proportion.sum()

52.33202870189171

In [88]:
df_filtrado = df.query('3 < qtd_termos < 10')

In [89]:
df_filtrado['topic']

0        Ciência da implementação;Saúde mental;Política...
1        Bioethics;Social sciences;Technology;Global he...
2        Educação médica;Povos indígenas;Programa Mais ...
3        Ativismo político;Recusa do paciente ao tratam...
4        biofloc technology;flocponics;halophytes;Litop...
                               ...                        
18383    Escoliose;Tratamento Conservador;Testes Funcio...
18387    Disfunção Sexual;Anorgasmia;Disfunções Sexuais...
18390    Dor musculoesquelética;idosos;qualidade de vid...
18392    Cigarro Eletrônico;Sistemas Eletrônicos de Lib...
18393    Diástase;Reto Abdominal;Período Pós-Parto;Fisi...
Name: topic, Length: 9627, dtype: object

In [90]:
#Verificar termos do "topic"
for i in df_filtrado['topic']:
    print(i)

Ciência da implementação;Saúde mental;Políticas de saúde;Estratégia saúde da família;Atenção primária à saúde
Bioethics;Social sciences;Technology;Global health;Pharmaceutical industries
Educação médica;Povos indígenas;Programa Mais Médicos;Covid-19
Ativismo político;Recusa do paciente ao tratamento;Tratamento psiquiátrico involuntário;Saúde mental;Psiquiatria
biofloc technology;flocponics;halophytes;Litopenaeus vannamei;marine aquaponics
cuttings;essential oil;germination;gibberellic acid;indole-3-butyric acid;potassium nitrate
gluten;immunohistochemistry;LH;ovarian;transglutaminase
cell seeding;decellularized Wharton's jelly matrix;natural scaffolds;tissue decellularization;Wharton's jelly matrix
COVID-19;Fuzzy model;population dynamics;measure interventions
esat-6;molecular identification;nontuberculous mycobacteria;tuberculosis;Mycobacterium avium
Trifluralin;Aloe vera;life span;DNA damage;larval toxicity
bioprospection;DNA-barcode;rbcL;Navicula pseudoantonii;Nitzschia inconspicua


In [91]:
df_filtrado_topics = df_filtrado.loc[:,['topic']].copy()

In [92]:
df_filtrado_topics

Unnamed: 0,topic
0,Ciência da implementação;Saúde mental;Política...
1,Bioethics;Social sciences;Technology;Global he...
2,Educação médica;Povos indígenas;Programa Mais ...
3,Ativismo político;Recusa do paciente ao tratam...
4,biofloc technology;flocponics;halophytes;Litop...
...,...
18383,Escoliose;Tratamento Conservador;Testes Funcio...
18387,Disfunção Sexual;Anorgasmia;Disfunções Sexuais...
18390,Dor musculoesquelética;idosos;qualidade de vid...
18392,Cigarro Eletrônico;Sistemas Eletrônicos de Lib...


In [98]:
df_filtrado = df_filtrado.copy()
df_filtrado['topic_formatado'] = df_filtrado['topic'].apply(lambda x: ';;;'.join([termo.strip() for termo in x.split(';;;')]))
df_filtrado['topic_formatado'] = df_filtrado['topic_formatado'].str.upper()

In [130]:
for i in df_filtrado['topic_formatado'].str.split(';'):
    print(i)

['CIÊNCIA DA IMPLEMENTAÇÃO', 'SAÚDE MENTAL', 'POLÍTICAS DE SAÚDE', 'ESTRATÉGIA SAÚDE DA FAMÍLIA', 'ATENÇÃO PRIMÁRIA À SAÚDE']
['BIOETHICS', 'SOCIAL SCIENCES', 'TECHNOLOGY', 'GLOBAL HEALTH', 'PHARMACEUTICAL INDUSTRIES']
['EDUCAÇÃO MÉDICA', 'POVOS INDÍGENAS', 'PROGRAMA MAIS MÉDICOS', 'COVID-19']
['ATIVISMO POLÍTICO', 'RECUSA DO PACIENTE AO TRATAMENTO', 'TRATAMENTO PSIQUIÁTRICO INVOLUNTÁRIO', 'SAÚDE MENTAL', 'PSIQUIATRIA']
['BIOFLOC TECHNOLOGY', 'FLOCPONICS', 'HALOPHYTES', 'LITOPENAEUS VANNAMEI', 'MARINE AQUAPONICS']
['CUTTINGS', 'ESSENTIAL OIL', 'GERMINATION', 'GIBBERELLIC ACID', 'INDOLE-3-BUTYRIC ACID', 'POTASSIUM NITRATE']
['GLUTEN', 'IMMUNOHISTOCHEMISTRY', 'LH', 'OVARIAN', 'TRANSGLUTAMINASE']
['CELL SEEDING', "DECELLULARIZED WHARTON'S JELLY MATRIX", 'NATURAL SCAFFOLDS', 'TISSUE DECELLULARIZATION', "WHARTON'S JELLY MATRIX"]
['COVID-19', 'FUZZY MODEL', 'POPULATION DYNAMICS', 'MEASURE INTERVENTIONS']
['ESAT-6', 'MOLECULAR IDENTIFICATION', 'NONTUBERCULOUS MYCOBACTERIA', 'TUBERCULOSIS', 'M

## Classificar termos únicos

In [125]:
set_termos_unicos = set()

for row in df_filtrado['topic_formatado']:
    for termo in row.split(';;;'):
        set_termos_unicos.add(termo)

lista_termos_unicos = list(set_termos_unicos)
lista_termos_unicos.remove('')

In [131]:
lista_termos_unicos = sorted(lista_termos_unicos)

In [152]:
len(lista_termos_unicos)

34520

In [135]:
34520 / 100

345.2

In [154]:
batch_size = 100
delimiter = '-' * 100

for i in range(0, len(lista_termos_unicos), batch_size):
    batch = lista_termos_unicos[i:i + batch_size]
    batch_number = i // batch_size + 1
    print(40*'+',f"LOTE {batch_number}",40*'+')
    print('\n'.join(batch))
    print(delimiter,'\n\n\n\n\n\n\n\n')



++++++++++++++++++++++++++++++++++++++++ LOTE 1 ++++++++++++++++++++++++++++++++++++++++
"VINHO VERDE" WINE MUST
#METOO
$\PSI$-HILFER FRACTIONAL DERIVATIVE
&NBSP
'NOIR' GALLEGO
( E )-CARYOPHYLLENE
(-)-EPICATECHIN GALLATE
(0 0 1) SURFACE AND THERMOELECTRIC
(DE)MEDICALIZATION
(DES)MEDICALIZAÇÃO
(IM)PRESCRITIBILIDADE
(IN)VISIBILIDADE
(IN)VISIBILITY
(L) WALP
(MARGINAL AND EFFECTIVE) INTEREST RATE
(R)-NEA
(U-TH-SM)/HE THERMOCHRONOLOGY
(ZOX-LME)
0
1)
1,2,3-TRIAZOLE
1,2,3-TRIAZOLE DERIVATIVES
1,3,5-TRIAZA-7-PHOSPHAADAMANTANE
1-NAPHTHYL GROUP
1-TRIFLUOROMETHOXYPHENYL-3-(1-PROPIONYLPIPERIDINE-4-YL) UREA
1-ÓXIDO DE DE 4-NITROQUINOLINA
1060400-6
10TH-GRADE STUDENTS
11AH
13 DE MAIO
14TH CENTURY
15N ISOTOPES
15N NATURAL ABUNDANCE
15N-LABELED FERTILIZER
15TH CENTURY IN PORTUGAL
16S RDNA
16S RRNA
16S RRNA SEQUENCING
17 ODS
1755 EARTHQUAKE
17Β-ESTRADIOL
18 FLUORO-DESOXI-GLUCOSE
18-FLUORO-DESOXI-GLUCOSE
18S
18S RRNA
18S RRNA GENE
18TH-CENTURY CALCULUS
1967–1970 VOLCANIC SEQUENCE
1987-1988 NATIONAL CONS

In [None]:
termos_classificados = {
    "VINHO VERDE": "Hospitality & Leisure Management",
    "WINE MUST": "Hospitality & Leisure Management",
    "#METOO": "Social Work",
    "$\PSI$-HILFER FRACTIONAL DERIVATIVE": "Mathematics",
    "&NBSP": "Computer Science & Information Systems",
    "'NOIR' GALLEGO": "Modern Languages",
    "( E )-CARYOPHYLLENE": "Chemistry",
    "(-)-EPICATECHIN GALLATE": "Chemistry",
    "(0 0 1) SURFACE AND THERMOELECTRIC": "Physics & Astronomy",
    "(DE)MEDICALIZATION": "Medicine",
    "(DES)MEDICALIZAÇÃO": "Medicine",
    "(IM)PRESCRITIBILIDADE": "Law",
    "(IN)VISIBILIDADE": "Geophysics",
    "(IN)VISIBILITY": "Geophysics",
    "(L) WALP": "Modern Languages",
    "(MARGINAL AND EFFECTIVE) INTEREST RATE": "Economics & Econometrics",
    "(R)-NEA": "Chemistry",
    "(U-TH-SM)/HE THERMOCHRONOLOGY": "Earth & Marine Sciences",
    "(ZOX-LME)": "Economics & Econometrics",
    "0": "Mathematics",
    "1)": "Mathematics",
    "1,2,3-TRIAZOLE": "Chemistry",
    "1,2,3-TRIAZOLE DERIVATIVES": "Chemistry",
    "1,3,5-TRIAZA-7-PHOSPHAADAMANTANE": "Chemistry",
    "1-NAPHTHYL GROUP": "Chemistry",
    "1-TRIFLUOROMETHOXYPHENYL-3-(1-PROPIONYLPIPERIDINE-4-YL) UREA": "Chemistry",
    "1-ÓXIDO DE DE 4-NITROQUINOLINA": "Chemistry",
    "1060400-6": "Mathematics",
    "10TH-GRADE STUDENTS": "Education",
    "11AH": "Chemistry",
    "13 DE MAIO": "History",
    "14TH CENTURY": "History",
    "15N ISOTOPES": "Physics & Astronomy",
    "15N NATURAL ABUNDANCE": "Physics & Astronomy",
    "15N-LABELED FERTILIZER": "Agriculture & Forestry",
    "15TH CENTURY IN PORTUGAL": "History",
    "16S RDNA": "Biological Sciences",
    "16S RRNA": "Biological Sciences",
    "16S RRNA SEQUENCING": "Biological Sciences",
    "17 ODS": "Development Studies",
    "1755 EARTHQUAKE": "Earth & Marine Sciences",
    "17Β-ESTRADIOL": "Pharmacy & Pharmacology",
    "18 FLUORO-DESOXI-GLUCOSE": "Medicine",
    "18-FLUORO-DESOXI-GLUCOSE": "Medicine",
    "18S": "Biological Sciences",
    "18S RRNA": "Biological Sciences",
    "18S RRNA GENE": "Biological Sciences",
    "18TH-CENTURY CALCULUS": "Mathematics",
    "1967–1970 VOLCANIC SEQUENCE": "Earth & Marine Sciences",
    "1987-1988 NATIONAL CONSTITUENT ASSEMBLY": "Politics & International Studies",
    "19TH CENTURY": "History",
    "19TH-CENTURY INTELLIGENTSIA": "History",
    "19TH-CENTURY MANUFACTURE": "History",
    "1ST REPUBLIC": "Politics & International Studies",
    "1ª REPÚBLICA": "Politics & International Studies",
    "1º CICLO": "Education",
    "1º CICLO DO ENSINO BÁSICO": "Education",
    "1’-ACETOXYCHAVICOL ACETATE": "Chemistry",
    "2": "Mathematics",
    "2,3-DIPHOSPHOGLYCERATE": "Chemistry",
    "2-(2’-HYDROXYPHENYL)BENZOXAZOLE": "Chemistry",
    "2-D MATERIALS": "Materials Science",
    "2-DEOXY-D-GLUCOSE": "Medicine",
    "2-DIMENSIONAL MATERIALS": "Materials Science",
    "2-DIPHENILE-1-PICRYLEHYDRAZYLE (DPPH)": "Chemistry",
    "2-NAPHTHYLACETYL GROUP": "Chemistry",
    "2-NITROIMIDAZOOXAZINE": "Chemistry",
    "2-PHENYLETHANOL MONITORING": "Chemistry",
    "2015 LEGISLATIVE ELECTION": "Politics & International Studies",
    "2019 NOVEL CORONAVIRUS": "Medicine",
    "2022": "Mathematics",
    "2030 AGENDA": "Development Studies",
    "2030AGENDA": "Development Studies",
    "21ST CENTURY": "History",
    "25(OH)D": "Medicine",
    "25-HYDROXYVITAMIN D": "Medicine",
    "25-HYDROXYVITAMIN D SERUM": "Medicine",
    "2D ELECTRON GAS": "Physics & Astronomy",
    "2D FLUORESCENCE SPECTROSCOPY": "Physics & Astronomy",
    "2D VISCOELASTIC MODELING": "Physics & Astronomy",
    "2’,3’-DIDEOXYADENOSINE": "Chemistry",
    "3,4-DIHYDROPYRIMIDIN-2(1H)-ONE DERIVATIVE": "Chemistry",
    "3-(3′,4′-DIHYDROXYPHENYL)-8-HYDROXYCOUMARIN": "Chemistry",
    "3-CIC": "Chemistry",
    "3-CMC": "Chemistry",
    "3-HYDROXY-4-PYRIDINONE LIGAND": "Chemistry",
    "3-INDOLEACETIC ACID": "Chemistry",
    "3-INDOXYL SULFATE": "Chemistry",
    "3-NITROPROPIONIC ACID": "Chemistry",
    "3-PBA": "Chemistry",
    "3000000-9 - ENGENHARIAS, 3070304-2 - RESÍDUOS SÓLIDOS, DOMÉSTICOS E INDUSTRIAIS": "Engineering - General",
    "3040400-2 SISTEMAS ELÉTRICOS DE POTÊNCIA": "Engineering - Electrical & Electronic",
    "3040402-9 TRANSMISSÃO DA ENERGIA ELÉTRICA, DISTRIBUIÇÃO DA ENERGIA ELÉTRICA": "Engineering - Electrical & Electronic",
    "3070000-0 ENGENHARIA SANITÁRIA": "Engineering - Civil & Structural",
    "3070000-0 ENGENHARIA SANITÁRIA / 3070300-0 SANEAMENTO BÁSICO / 3070304-2 RESÍDUOS SÓLIDOS, DOMÉSTICOS E INDUSTRIAIS": "Engineering - Civil & Structural",
    "3070100-7 RECURSOS HÍDRICOS": "Engineering - Civil & Structural",
    "3070303-4 DRENAGEM URBANA DE ÁGUAS PLUVIAIS": "Engineering - Civil & Structural",
    "3080000-5 ENGENHARIA DE PRODUÇÃO": "Engineering - General",
    "3080000-5 ENGENHARIA DE PRODUÇÃO - INOVAÇÃO TECNOLÓGICA": "Engineering - General",
    "33 PH100": "Chemistry",
    #LOTE2
    "3D BIOPRINTING": "Engineering - General",
    "3D CONCRETE PRINTING": "Engineering - Civil & Structural",
    "3D CONSTRUCTS": "Engineering - General",
    "3D FACE RECOGNITION": "Computer Science & Information Systems",
    "3D GRASE": "Engineering - General",
    "3D HUMAN MODEL": "Biological Sciences",
    "3D HYDRAULIC FRACTURING": "Engineering - Petroleum",
    "3D IMAGE": "Computer Science & Information Systems",
    "3D MODEL": "Computer Science & Information Systems",
    "3D MODELING": "Computer Science & Information Systems",
    "3D POINT CLOUDS": "Computer Science & Information Systems",
    "3D POROUS MORPHOLOGY": "Materials Science",
    "3D PRINT": "Engineering - General",
    "3D PRINTING": "Engineering - General",
    "3D PRINTING RESINS": "Materials Science",
    "3D SCAFFOLDS": "Engineering - General",
    "3D-PRINTED DRUGS": "Medicine",
    "3D-PRINTED MAGNETIC DEVICE": "Engineering - General",
    "3D-PRINTED SCAFFOLDS": "Engineering - General",
    "3D-QSAR": "Chemistry",
    "3RD GENERATION MMPS": "Biological Sciences",
    "3º CICLO DE ENSINO BÁSICO": "Education",
    "3º MILLENNIUM A C": "History",
    "3′UTR": "Biological Sciences",
    "4-AMINOPHENOL": "Chemistry",
    "4-CARVOMENTHENOL": "Chemistry",
    "4-CIC": "Chemistry",
    "4-CMC": "Chemistry",
    "4-HYDROXYBENZOIC ACID": "Chemistry",
    "4-MDMB": "Chemistry",
    "4-MDMP": "Chemistry",
    "4-MEAP": "Chemistry",
    "4-MNEB": "Chemistry",
    "4-NITROPHENOL": "Chemistry",
    "4-NITROQUINOLINE 1-OXIDE": "Chemistry",
    "45S5-K BIOGLASS": "Materials Science",
    "46, XX DISORDERS OF SEX DEVELOPMENT/SURGERY": "Medicine",
    "4A ZEOLITE MOLECULAR SIEVE": "Chemistry",
    "4DK-MEANS CLUSTERING": "Data Science",
    "4NOPRESSURE": "Engineering - General",
    "4TH INDUSTRIAL REVOLUTION": "Engineering - General",
    "5-(2-PYRIDYL-1-OXIDE)TETRAZOLE": "Chemistry",
    "5-AZA-DC": "Medicine",
    "5-DEAZAALLOXAZINE": "Chemistry",
    "5-DEAZAFLAVIN": "Chemistry",
    "5-FLUOROURACIL": "Medicine",
    "5-HT": "Biological Sciences",
    "5-HT1A": "Biological Sciences",
    "5-HT3RECEPTORS": "Biological Sciences",
    "5-HYDROXYMETHYLURACIL": "Chemistry",
    "5-LEUKOTRIENE": "Chemistry",
    "5-STAR OPEN DATA": "Data Science",
    "5070000-6 CIÊNCIA E TECNOLOGIA DE ALIMENTOS": "Biological Sciences",
    "5G": "Engineering - General",
    "5G NEW RADIO": "Engineering - General",
    "5HT3": "Biological Sciences",
    "5′-DEOXY-5-FLUOROCYTIDINE": "Chemistry",
    "60+": "Social Policy & Administration",
    "6000000-7CIÊNCIAS SOCIAIS APLICADAS 6020000-6 ADMINISTRAÇÃO": "Business & Management Studies",
    "6020202-5": "Business & Management Studies",
    "6050000-0 PLANEJAMENTO URBANO E REGIONAL": "Geography",
    "6060600-2 POLÍTICA PÚBLICA E POPULAÇÃO": "Politics & International Studies",
    "6130000-4 TURISMO": "Hospitality & Leisure Management",
    "6TH MASS EXTINCTION": "Environmental Sciences",
    "7,12-DIMETHYLBENZANTHRACENE": "Chemistry",
    "7010000-4 (FILOSOFIA)": "Philosophy",
    "7080000-6 EDUCAÇÃO / 7080400-1 ENSINO-APRENDIZAGEM": "Education",
    "7080000-6 EDUCAÇÃO/ 7080000-6 EDUCAÇÃO 7080100-2 FUNDAMENTOS DA EDUCAÇÃO": "Education",
    "7090000-0 CIÊNCIA POLÍTICA 7090400-6 POLÍTICAS PÚBLICAS": "Politics & International Studies",
    "8-GALACTOSIDASE": "Biological Sciences",
    "92 BASE DELETION": "Biological Sciences",
    "<IR> FRAMEWORK": "Engineering - General",
    "A": "Art & Design",
    "A ARTE DO GESSO ENTRE A CRIAÇÃO E A RÉPLICA – ESTUDO E PRESERVAÇÃO - 1 - LISBOA - 2021": "Art & Design",
    "A FILOSOFIA NA ALCOVA": "Philosophy",
    "A NOVA GERAÇÃO": "Art & Design",
    "A NOVA GERAÇÃO (THE NEW GENERATION)": "Art & Design",
    "A POSTERIORI": "Philosophy",
    "A PRIORI": "Philosophy",
    "A SYSTEMATIC REVIEW": "Medicine",
    "A/CI CURVES": "Physics & Astronomy",
    "A549": "Biological Sciences",
    "A:2018": "Law",
    "AA6063-T6 ALUMINUM ALLOY": "Materials Science",
    "AALPHAHERPESVIRUSES": "Biological Sciences",
    "AASHTO LRFD": "Engineering - Civil & Structural",
    "AAV8-MEDIATED GENE THERAPY COMBINED WITH CHEMOTHERAPY": "Medicine",
    "AAVS-MEDIATED GENE DELIVERY": "Biological Sciences",
    "ABA": "Biological Sciences",
    "ABA SIGNALING": "Biological Sciences",
    "ABALONE DATASET": "Biological Sciences",
    "ABAMECTIN": "Chemistry",
    "ABANDONMENT": "Social Work",
    "ABANDONO AFETIVO": "Social Work",
    "ABANDONO DE ALIMENTOS NÃO SAUDÁVEIS": "Medicine",
    "ABANDONO DO MAGISTÉRIO": "Education",
    "ABANDONO ESCOLAR": "Education",
    "ABANDONO PATERNO": "Social Work",
    "ABAQUS": "Engineering - Mechanical, Aeronautical & Manufacturing",
    "ABASTECIMENTO DE ÁGUA": "Engineering - General",
    #LOTE3
    "ABASTECIMIENTO DE AGUA": "Engineering - Civil & Structural",
    "ABBOTTABAD": "Geography",
    "ABC DE CORDEL": "Art & Design",
    "ABC PAULISTA REGION": "Geography",
    "ABCESSO EPIDURAL": "Medicine",
    "ABCESSO EPIGLÓTICO": "Medicine",
    "ABDOMEN": "Anatomy & Physiology",
    "ABDOMEN-PELVIS": "Anatomy & Physiology",
    "ABDOMINAL": "Anatomy & Physiology",
    "ABDOMINAL AORTA": "Anatomy & Physiology",
    "ABDOMINAL AORTIC ANEURYSM": "Medicine",
    "ABDOMINAL AORTIC ANEURYSM (AAA)": "Medicine",
    "ABDOMINAL AORTIC CALCIFICATION": "Medicine",
    "ABDOMINAL CORE": "Anatomy & Physiology",
    "ABDOMINAL MUSCLE": "Anatomy & Physiology",
    "ABDOMINAL OBESITY": "Medicine",
    "ABDOMINAL PAIN": "Medicine",
    "ABDOMINAL PAIN/ETIOLOGY": "Medicine",
    "ABDÓMEN AGUDO": "Medicine",
    "ABERRATIONS": "Physics & Astronomy",
    "ABERTO": "Art & Design",
    "ABERTURA FINANCEIRA": "Economics & Econometrics",
    "ABIOTIC FACTORS": "Environmental Sciences",
    "ABIOTIC FILTERING": "Environmental Sciences",
    "ABIOTIC STRESS": "Environmental Sciences",
    "ABIOTIC VARIABLES": "Environmental Sciences",
    "ABJECTION": "Psychology",
    "ABLATION": "Earth & Marine Sciences",
    "ABLE-BODIED": "Social Policy & Administration",
    "ABLEISM": "Social Work",
    "ABNORMAL UTERINE BLEEDING": "Medicine",
    "ABNORMALITIES": "Medicine",
    "ABORDAGEM BASEADA EM COMPETÊNCIAS": "Education",
    "ABORDAGEM CONSENSUAL": "Psychology",
    "ABORDAGEM HERMENÊUTICA": "Philosophy",
    "ABORDAGEM TERAPÊUTICA DA RINITE": "Medicine",
    "ABORDAGENS PLURAIS": "Philosophy",
    "ABORTION": "Medicine",
    "ABORTO INDUZIDO": "Medicine",
    "ABORTO LEGAL": "Law",
    "ABS": "Materials Science",
    "ABSENTEEISM": "Social Policy & Administration",
    "ABSENTEÍSMO": "Social Policy & Administration",
    "ABSENTISMO": "Social Policy & Administration",
    "ABSOBSING SET": "Mathematics",
    "ABSOLUTE CONFIGURATION": "Chemistry",
    "ABSORBANCE": "Chemistry",
    "ABSORBER PLATE": "Engineering - Mechanical, Aeronautical & Manufacturing",
    "ABSORPTION": "Chemistry",
    "ABSORPTIVE CAPACITY": "Business & Management Studies",
    "ABSTRACT SYSTEMS": "Computer Science & Information Systems",
    "ABSTRACT VS FIGURATIVE": "Art & Design",
    "ABSTRACTION": "Art & Design",
    "ABTS": "Chemistry",
    "ABTS ASSAY": "Chemistry",
    "ABU DHABI": "Geography",
    "ABUNDANCE-IMPACT CURVE": "Environmental Sciences",
    "ABUNDÂNCIA COMUNICATIVA": "Linguistics",
    "ABUSE": "Social Work",
    "ABUSIVE DYNAMICS": "Psychology",
    "ABUSO": "Social Work",
    "ABUSO INFANTIL": "Social Work",
    "ABUSO SEXUAL DA CRIANÇA": "Social Work",
    "AC CONDUCTIVITY": "Physics & Astronomy",
    "ACACIA DEALBATA": "Biology",
    "ACACIA SPECIES": "Biology",
    "ACADEMIA": "Education",
    "ACADEMIA MILITAR PORTUGUESA": "Education",
    "ACADEMIA OTHERWISE": "Education",
    "ACADEMIC ACHIEVEMENT": "Education",
    "ACADEMIC AND NON-ACADEMIC LITERATURE": "Literature",
    "ACADEMIC COLONIALISM": "Education",
    "ACADEMIC COMPETENCIES": "Education",
    "ACADEMIC COURSES": "Education",
    "ACADEMIC EDUCATION": "Education",
    "ACADEMIC ENGAGEMENT": "Education",
    "ACADEMIC FAILURE": "Education",
    "ACADEMIC FREEDOM": "Education",
    "ACADEMIC IMPACT": "Education",
    "ACADEMIC LIBRARY": "Education",
    "ACADEMIC MAJOR": "Education",
    "ACADEMIC ORCHESTRA": "Music",
    "ACADEMIC OUTCOMES": "Education",
    "ACADEMIC OUTPUT": "Education",
    "ACADEMIC PATH": "Education",
    "ACADEMIC PERFORMANCE": "Education",
    "ACADEMIC PRODUCTIONS": "Education",
    "ACADEMIC RESEARCH": "Education",
    "ACADEMIC SUCCESS": "Education",
    "ACADEMIC TRAINING": "Education",
    "ACADEMIC WRITING": "Education",
    "ACADEMIC-SCIENTIFIC LITERACY": "Education",
    "ACAI": "Biology",
    "ACAMPAMENTO": "Hospitality & Leisure Management",
    "ACANTHOSIS NIGRICANS": "Medicine",
    "ACANTHOTOMICUS": "Biology",
    "ACANTOCEPHALA": "Biology",
    "ACARI": "Biology",
    "ACARICIDE": "Biology",
    "ACARICIDE RESIDUES": "Biology"
    #LOTE4


}