In [77]:
import pandas as pd
import json
from pandas import json_normalize
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


In [78]:
# Caminho para o seu arquivo JSONL
caminho_arquivo = 'med_items.jsonl'  # Substitua pelo caminho correto

# Lista para armazenar os dados
data = []

# Ler o arquivo JSONL linha por linha
with open(caminho_arquivo, 'r', encoding='utf-8') as f:
    for line in f:
        data.append(json.loads(line))


In [79]:
# Normalizar a coluna 'farmacos'
df_farmacos = json_normalize(
    data,
    record_path='farmacos',
    meta=['medicamentoid']
)

# Exibir as primeiras linhas para verificar
print(df_farmacos.head())


   farmacoid     farmaco        slug  \
0       1570  Canabidiol  canabidiol   
1       1570  Canabidiol  canabidiol   
2       1570  Canabidiol  canabidiol   
3       1570  Canabidiol  canabidiol   
4       1570  Canabidiol  canabidiol   

                                     tipo_receita medicamentoid  
0  Receita de Controle Especial em duas vias "C1"         48954  
1  Receita de Controle Especial em duas vias "C1"         52836  
2  Receita de Controle Especial em duas vias "C1"         50929  
3  Receita de Controle Especial em duas vias "C1"         50930  
4  Receita de Controle Especial em duas vias "C1"         52838  


In [80]:
# Normalizar a coluna 'historico_preco'
df_historico_preco = json_normalize(
    data,
    record_path='historico_preco',
    meta=['medicamentoid', 'produto', 'apresentacao', 'laboratorioid', 'laboratorio']
)

# Exibir as primeiras linhas para verificar
print(df_historico_preco.head())


   mes   ano  menor_preco  maior_preco medicamentoid     produto  \
0  Dez  2023      2147.90      2640.04         48954  Canabidiol   
1  Jan  2024      2147.90      2640.04         48954  Canabidiol   
2  Fev  2024      2147.90      2640.10         48954  Canabidiol   
3  Mar  2024      2147.90      2640.10         48954  Canabidiol   
4  Abr  2024      2060.03      2653.76         48954  Canabidiol   

                                apresentacao laboratorioid  \
0  200mg/ml Solução 30 ml + Seringa Dosadora            18   
1  200mg/ml Solução 30 ml + Seringa Dosadora            18   
2  200mg/ml Solução 30 ml + Seringa Dosadora            18   
3  200mg/ml Solução 30 ml + Seringa Dosadora            18   
4  200mg/ml Solução 30 ml + Seringa Dosadora            18   

                  laboratorio  
0  Prati Donaduzzi & CIA Ltda  
1  Prati Donaduzzi & CIA Ltda  
2  Prati Donaduzzi & CIA Ltda  
3  Prati Donaduzzi & CIA Ltda  
4  Prati Donaduzzi & CIA Ltda  


In [81]:
# Converter 'medicamentoid' para string em ambos os DataFrames
df_farmacos['medicamentoid'] = df_farmacos['medicamentoid'].astype(str)
df_historico_preco['medicamentoid'] = df_historico_preco['medicamentoid'].astype(str)


In [82]:
# Mesclar df_historico_preco com df_farmacos baseado em 'medicamentoid'
df_historico_preco = pd.merge(
    df_historico_preco,
    df_farmacos,
    on='medicamentoid',
    how='left',
    suffixes=('', '_y')
)

# Remover colunas duplicadas, se houver
df_historico_preco = df_historico_preco.loc[:, ~df_historico_preco.columns.str.endswith('_y')]

# Exibir informações do DataFrame após a mesclagem
print(df_historico_preco.info())
print(df_historico_preco.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9959 entries, 0 to 9958
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   mes            9959 non-null   object 
 1   ano            9959 non-null   int64  
 2   menor_preco    9959 non-null   float64
 3   maior_preco    9959 non-null   float64
 4   medicamentoid  9959 non-null   object 
 5   produto        9959 non-null   object 
 6   apresentacao   9959 non-null   object 
 7   laboratorioid  9959 non-null   object 
 8   laboratorio    9959 non-null   object 
 9   farmacoid      9959 non-null   int64  
 10  farmaco        9959 non-null   object 
 11  slug           9959 non-null   object 
 12  tipo_receita   9959 non-null   object 
dtypes: float64(2), int64(2), object(9)
memory usage: 1011.6+ KB
None
   mes   ano  menor_preco  maior_preco medicamentoid     produto  \
0  Dez  2023      2147.90      2640.04         48954  Canabidiol   
1  Jan  2024      2147.90  

In [83]:
# Verificar se há valores nulos em 'farmacoid' e 'farmaco'
nulos_farmacoid = df_historico_preco['farmacoid'].isnull().sum()
nulos_farmaco = df_historico_preco['farmaco'].isnull().sum()

print(f"Valores nulos em 'farmacoid': {nulos_farmacoid}")
print(f"Valores nulos em 'farmaco': {nulos_farmaco}")


Valores nulos em 'farmacoid': 0
Valores nulos em 'farmaco': 0


In [84]:
# Dicionário de mapeamento de meses em português para números
meses_pt = {
    'Jan': 1,
    'Fev': 2,
    'Mar': 3,
    'Abr': 4,
    'Mai': 5,
    'Jun': 6,
    'Jul': 7,
    'Ago': 8,
    'Set': 9,
    'Out': 10,
    'Nov': 11,
    'Dez': 12
}

# Mapear os meses para números
df_historico_preco['numero_mes'] = df_historico_preco['mes'].map(meses_pt)

# Verificar se todos os meses foram mapeados corretamente
meses_nao_map = df_historico_preco[df_historico_preco['numero_mes'].isnull()]['mes'].unique()
if len(meses_nao_map) > 0:
    print(f"Meses não mapeados: {meses_nao_map}")
    # Tratamento adicional se necessário (exemplo: remover essas linhas ou preencher com valores padrão)
    # df_historico_preco = df_historico_preco[~df_historico_preco['numero_mes'].isnull()]


In [89]:
# Renomear as colunas 'ano' para 'year' e 'numero_mes' para 'month'
df_historico_preco_renamed = df_historico_preco.rename(columns={
    'ano': 'year',
    'numero_mes': 'month'
})

# Adicionar o dia como 1 para representar o primeiro dia do mês
df_historico_preco_renamed['data'] = pd.to_datetime(
    df_historico_preco_renamed[['year', 'month']].assign(day=1),
    errors='coerce'  # Coerce para NaT em caso de erro
)

# Verificar a criação da coluna 'data'
print(df_historico_preco_renamed[['year', 'month', 'data']].head())


   year  month       data
0  2023     12 2023-12-01
1  2024      1 2024-01-01
2  2024      2 2024-02-01
3  2024      3 2024-03-01
4  2024      4 2024-04-01


In [90]:
# 1. Renomear as colunas 'ano' para 'year' e 'numero_mes' para 'month'
df_historico_preco_renamed = df_historico_preco.rename(columns={
    'ano': 'year',
    'numero_mes': 'month'
})

# 2. Verificar as primeiras linhas após a renomeação
print(df_historico_preco_renamed[['year', 'month']].head())

# 3. Adicionar o dia como 1 para representar o primeiro dia do mês
df_historico_preco_renamed['data'] = pd.to_datetime(
    df_historico_preco_renamed[['year', 'month']].assign(day=1),
    errors='coerce'  # Coerce para NaT em caso de erro
)

# 4. Verificar a criação da coluna 'data'
print(df_historico_preco_renamed[['year', 'month', 'data']].head())

# 5. Remover a coluna 'numero_mes' se não for mais necessária
df_historico_preco_renamed.drop(['year', 'month'], axis=1, inplace=True)

# Atualizar o DataFrame original
df_historico_preco = df_historico_preco_renamed

# Verificar as informações do DataFrame
print(df_historico_preco.info())


   year  month
0  2023     12
1  2024      1
2  2024      2
3  2024      3
4  2024      4
   year  month       data
0  2023     12 2023-12-01
1  2024      1 2024-01-01
2  2024      2 2024-02-01
3  2024      3 2024-03-01
4  2024      4 2024-04-01
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9959 entries, 0 to 9958
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   mes            9959 non-null   object        
 1   menor_preco    9959 non-null   float64       
 2   maior_preco    9959 non-null   float64       
 3   medicamentoid  9959 non-null   object        
 4   produto        9959 non-null   object        
 5   apresentacao   9959 non-null   object        
 6   laboratorioid  9959 non-null   object        
 7   laboratorio    9959 non-null   object        
 8   farmacoid      9959 non-null   int64         
 9   farmaco        9959 non-null   object        
 10  slug           9959 non-null  

In [92]:
# Selecionar apenas as colunas necessárias
df_final = df_historico_preco.copy()

# Exibir as primeiras linhas para verificar
print(df_final.head())


   mes  menor_preco  maior_preco medicamentoid     produto  \
0  Dez      2147.90      2640.04         48954  Canabidiol   
1  Jan      2147.90      2640.04         48954  Canabidiol   
2  Fev      2147.90      2640.10         48954  Canabidiol   
3  Mar      2147.90      2640.10         48954  Canabidiol   
4  Abr      2060.03      2653.76         48954  Canabidiol   

                                apresentacao laboratorioid  \
0  200mg/ml Solução 30 ml + Seringa Dosadora            18   
1  200mg/ml Solução 30 ml + Seringa Dosadora            18   
2  200mg/ml Solução 30 ml + Seringa Dosadora            18   
3  200mg/ml Solução 30 ml + Seringa Dosadora            18   
4  200mg/ml Solução 30 ml + Seringa Dosadora            18   

                  laboratorio  farmacoid     farmaco        slug  \
0  Prati Donaduzzi & CIA Ltda       1570  Canabidiol  canabidiol   
1  Prati Donaduzzi & CIA Ltda       1570  Canabidiol  canabidiol   
2  Prati Donaduzzi & CIA Ltda       1570  Canabidi

In [95]:
# Converter colunas para tipos categóricos
categorical_cols = ['farmacoid', 'farmaco', 'laboratorioid', 'laboratorio', 'produto', 'medicamentoid']
for col in categorical_cols:
    df_final[col] = df_final[col].astype('category')

# Exibir informações para verificar as mudanças
print(df_final.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9959 entries, 0 to 9958
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   mes            9959 non-null   object        
 1   menor_preco    9959 non-null   float64       
 2   maior_preco    9959 non-null   float64       
 3   medicamentoid  9959 non-null   category      
 4   produto        9959 non-null   category      
 5   apresentacao   9959 non-null   object        
 6   laboratorioid  9959 non-null   category      
 7   laboratorio    9959 non-null   category      
 8   farmacoid      9959 non-null   category      
 9   farmaco        9959 non-null   category      
 10  slug           9959 non-null   object        
 11  tipo_receita   9959 non-null   object        
 12  data           9959 non-null   datetime64[ns]
dtypes: category(6), datetime64[ns](1), float64(2), object(4)
memory usage: 659.4+ KB
None


In [96]:
# Definir o caminho para salvar o DataFrame otimizado
caminho_exportacao = 'df_optimized.parquet'

# Salvar o DataFrame em formato Parquet com compressão
df_final.to_parquet(caminho_exportacao, index=False, compression='snappy')

print(f"DataFrame otimizado salvo em {caminho_exportacao}")


DataFrame otimizado salvo em df_optimized.parquet


In [97]:
print(df_final.head(10))

   mes  menor_preco  maior_preco medicamentoid     produto  \
0  Dez      2147.90      2640.04         48954  Canabidiol   
1  Jan      2147.90      2640.04         48954  Canabidiol   
2  Fev      2147.90      2640.10         48954  Canabidiol   
3  Mar      2147.90      2640.10         48954  Canabidiol   
4  Abr      2060.03      2653.76         48954  Canabidiol   
5  Mai      2330.24      2653.76         48954  Canabidiol   
6  Jun      2330.24      2653.76         48954  Canabidiol   
7  Jul      2330.00      2752.13         48954  Canabidiol   
8  Ago      2330.00      2752.13         48954  Canabidiol   
9  Set      2330.00      2752.13         48954  Canabidiol   

                                apresentacao laboratorioid  \
0  200mg/ml Solução 30 ml + Seringa Dosadora            18   
1  200mg/ml Solução 30 ml + Seringa Dosadora            18   
2  200mg/ml Solução 30 ml + Seringa Dosadora            18   
3  200mg/ml Solução 30 ml + Seringa Dosadora            18   
4  200m

In [13]:
import requests
from bs4 import BeautifulSoup

response = requests.get('https://www.pfizerpro.com.br/incorporacoes-cp-ans-sus#tabpanel-tab2')

response.encoding = 'utf-8'

soup = BeautifulSoup(response.content, 'html.parser')

id = 'iwhp5lh'

table = soup.find(id=id)

In [14]:
print(table)

<table border="1" cellpadding="3" cellspacing="0" data-highlightable="1" draggable="true" id="iwhp5lh">
<thead><tr>
<th>Nº</th>
<th>Nome da Tecnologia</th>
<th>Localização</th>
<th>Indicação</th>
<th>Data de Incorporação</th>
<th>Portaria</th>
<th>Relatório de Recomendação</th>
</tr></thead>
<tbody>
<tr>
<td>21</td>
<td>Abemaciclibe</td>
<td>Mama</td>
<td>Câncer de mama avançado ou metastático, com HR+ e HER2- em pacientes adultas</td>
<td>06/12/2021</td>
<td><a href="https://www.gov.br/conitec/pt-br/midias/relatorios/portaria/2021/20211207_portaria_73.pdf" target="_blank">Acesse</a></td>
<td><a href="https://www.gov.br/conitec/pt-br/midias/relatorios/2021/20211207_relatorio_678_abemaciclibe_palbociclibe_ribociclibe_carcinoma_mama_final.pdf" target="_blank">Acesse</a></td>
</tr>
<tr>
<td>15</td>
<td>Abiraterona</td>
<td>Prostata</td>
<td>Câncer de próstata metastático resistente à castração em pacientes com uso prévio de quimioterapia</td>
<td>24/07/2019</td>
<td><a href="https://www.g

In [35]:
a_tag = soup.find('a')
if a_tag and 'href'in a_tag.attrs:
    print(a_tag['href'])

https://www.ans.gov.br/component/legislacao/?view=legislacao&task=textoLei&format=raw&id=NDAzMw==


In [37]:
from io import StringIO
import pandas as pd

# Function to extract href from an <a> tag
def extract_href(cell_html):
    """
    Extracts the href attribute from an <a> tag in the cell.
    If no <a> tag is present, returns the original cell content.
    """
    soup = BeautifulSoup(cell_html, 'html.parser')
    a_tag = soup.find('a')
    if a_tag and 'href' in a_tag.attrs:
        return a_tag['href']
    else:
        return soup.get_text(strip=True)


html_str = str(table)
html_io = StringIO(html_str)
df_pre = pd.read_html(html_io)[0]


columns = list(df_pre.columns)
columns.remove('Nº')

df = df_pre[columns].copy()

df.loc[:, 'Portaria'] = df.loc[:, 'Portaria'].apply(extract_href)
df.loc[:, 'Relatório de Recomendação'] = df.loc[:, 'Relatório de Recomendação'].apply(extract_href)

print(df)

                                   Nome da Tecnologia  \
0                                        Abemaciclibe   
1                                         Abiraterona   
2                                     Alfainterferona   
3                                       Blinatumomabe   
4                                         Bortezomibe   
5                                         Bortezomibe   
6                                         Bortezomibe   
7                               Brentuximabe vedotina   
8                                         Crizotinibe   
9                           Erlotinibe, cloridrato de   
10                                         Gefitinibe   
11                             Hormonioterapia prévia   
12                             Imatinibe, mesilato de   
13                                         Nivolumabe   
14                                       Palbociclibe   
15                          Pazopanibe, cloridrato de   
16                             

In [None]:
import json

data_records = df.to_dict(orient='records')

json_data = json.dumps(data_records, ensure_ascii=False, indent=4)
json_data = json_data.replace('\\/', '/')


with open('onc_conitec.json', 'w', encoding='utf-8') as f:
    f.write(json_data)

SyntaxError: invalid syntax (935548511.py, line 3)

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
from io import StringIO

# Function to extract href from an <a> tag
def extract_href(cell_html):
    """
    Extracts the href attribute from an <a> tag in the cell.
    If no <a> tag is present, returns the original cell content.
    """
    soup = BeautifulSoup(cell_html, 'html.parser')
    a_tag = soup.find('a')
    if a_tag and 'href' in a_tag.attrs:
        return a_tag['href']
    else:
        return soup.get_text(strip=True)

# Step 1: Fetch the webpage content
url = 'https://www.pfizerpro.com.br/incorporacoes-cp-ans-sus#tabpanel-tab2'
response = requests.get(url)
response.encoding = 'utf-8'  # Ensure correct encoding

# Step 2: Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')

# Step 3: Locate the specific table using its ID
table_id = 'iwhp5lh'
table = soup.find('table', id=table_id)

if table is None:
    raise ValueError(f"No table found with id '{table_id}'")

# Step 4: Extract headers
headers = [th.get_text(strip=True) for th in table.find('thead').find_all('th')]

# Step 5: Extract rows
rows = []
for tr in table.find('tbody').find_all('tr'):
    cells = tr.find_all('td')
    row = {}
    for idx, cell in enumerate(cells):
        header = headers[idx]
        if header in ['Portaria', 'Relatório de Recomendação']:
            # Extract the href attribute if <a> tag is present
            row[header] = extract_href(str(cell))
        else:
            row[header] = cell.get_text(strip=True)
    rows.append(row)

# Step 6: Create DataFrame
df = pd.DataFrame(rows)


columns = list(df_pre.columns)
columns.remove('Nº')

df = df[columns]

# Optional: Verify the DataFrame
print(df.head())

# Step 8: Export DataFrame to pretty-printed JSON without escaped forward slashes
data_records = df.to_dict(orient='records')
json_data = json.dumps(data_records, ensure_ascii=False, indent=4).replace('\\/', '/')

with open('onc_conitec_pretty.json', 'w', encoding='utf-8') as f:
    f.write(json_data)

print("JSON data has been successfully saved to 'onc_conitec_pretty.json'.")

  Nome da Tecnologia                   Localização  \
0       Abemaciclibe                          Mama   
1        Abiraterona                      Prostata   
2    Alfainterferona                      Melanoma   
3      Blinatumomabe  Leucemia linfoblástica aguda   
4        Bortezomibe              Mieloma múltiplo   

                                           Indicação Data de Incorporação  \
0  Câncer de mama avançado ou metastático, com HR...           06/12/2021   
1  Câncer de próstata metastático resistente à ca...           24/07/2019   
2  Melanoma cutâneo em estágio clínico III, trata...           18/01/2013   
3  Leucemia linfoblástica aguda (LLA) B derivada ...           01/06/2022   
4  Mieloma múltiplo em pacientes adultos, não pre...           25/09/2020   

                                            Portaria  \
0  https://www.gov.br/conitec/pt-br/midias/relato...   
1  https://www.gov.br/conitec/pt-br/midias/relato...   
2  https://www.gov.br/conitec/pt-br/midias/r

In [12]:
import tempfile
import pdfplumber
import requests
import pandas as pd

url = 'https://www.gov.br/anvisa/pt-br/setorregulado/regularizacao/medicamentos/medicamentos-de-referencia/arquivos/lista-b-incluidos-08112024.pdf'

response = requests.get(url)

with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
    temp_pdf.write(response.content)
    temp_pdf_path = temp_pdf.name

tables = []

with pdfplumber.open(temp_pdf_path) as pdf:
    for page in pdf.pages:
        table = page.extract_table()
        tables.append(table)

In [13]:
# Step 3: Define expected header (adjust based on actual headers)
expected_header = ['FÁRMACO', 'DETENTOR', 'MEDICAMENTO', 'REGISTRO', 'CONCENTRAÇÃO', 'FORMA FARMACÊUTICA', 'DATA INCLUSÃO']

# Initialize a list to collect all data rows
data_rows = []

for table in tables:
    for row in table:
        # Check if the row is a title row by looking for the title text
        if 'LISTA A DE MEDICAMENTOS DE REFERÊNCIA' in row[0]:
            continue  # Skip title rows
        
        # Check if the row matches the expected header
        if row[:len(expected_header)] == expected_header:
            continue  # Skip header rows
        
        # Ensure the row has the expected number of columns
        if len(row) >= len(expected_header):
            # Extract only the required number of columns
            cleaned_row = row[:len(expected_header)]
            # Replace None with empty strings or handle as needed
            cleaned_row = [cell if cell is not None else '' for cell in cleaned_row]
            data_rows.append(cleaned_row)
        else:
            # Handle rows with fewer columns (optional)
            # For example, you might want to log or print these rows for manual inspection
            print(f"Skipping incomplete row: {row}")

# Step 4: Create the DataFrame
df = pd.DataFrame(data_rows, columns=expected_header)

# Optional: Clean DataFrame (e.g., trim whitespace)
df = df.map(lambda x: x.strip() if isinstance(x, str) else x)

# Display the first few rows of the DataFrame
print(df.head())

                                             FÁRMACO  \
0              LISTA B DE MEDICAMENTOS DE REFERÊNCIA   
1                                         ASSOCIAÇÃO   
2  acetato de betametasona + fosfato dissódico de...   
3           acetato de ciproterona + etinilestradiol   
4          acetato de clormadinona + etinilestradiol   

                                  DETENTOR         MEDICAMENTO   REGISTRO  \
0                                                                           
1                                 DETENTOR         MEDICAMENTO   REGISTRO   
2                                   Cosmed  CELESTONE SOLUSPAN  178170775   
3                                    Bayer            DIANE 35  170560072   
4  Grünenthal do Brasil\nFarmacêutica Ltda              BELARA  186100001   

          CONCENTRAÇÃO   FORMA FARMACÊUTICA     DATA INCLUSÃO  
0                                                              
1         CONCENTRAÇÃO  FORMA\nFARMACÊUTICA  DATA DE INCLUSÃO  
2  3mg/m

In [None]:
import json

data_records = df.to_dict(orient='records')
json_data = json.dumps(data_records, ensure_ascii=False, indent=4).replace('\\/', '/')

with open('anvisa_lista_b.json', 'w', encoding='utf-8') as f:
    f.write(json_data)

In [14]:
data_records2 = df.to_dict(orient='records')

dict_final = data_records + data_records2


json_data = json.dumps(dict_final, ensure_ascii=False, indent=4).replace('\\/', '/')

with open('anvisa_lista.json', 'w', encoding='utf-8') as f:
    f.write(json_data)

In [35]:
import json
from collections import Counter
from tqdm import tqdm  # Optional: For displaying a progress bar

def count_on_anvisa(output_file_path):
    """
    Counts the number of True and False values in the 'on_anvisa' field.
    """
    counter = Counter()
    with open(output_file_path, 'r', encoding='utf-8') as f:
        # Optionally, count total lines for progress bar
        try:
            import os
            total_lines = sum(1 for _ in open(output_file_path, 'r', encoding='utf-8'))
        except Exception:
            total_lines = None  # Proceed without progress bar if unable to determine

        with tqdm(total=total_lines, desc="Counting 'on_anvisa'") as pbar:
            for line in f:
                try:
                    med_item = json.loads(line)
                    on_anvisa = med_item.get('on_anvisa', False)
                    if isinstance(on_anvisa, bool):
                        counter[on_anvisa] += 1
                    else:
                        # Handle non-boolean representations
                        if str(on_anvisa).strip().lower() == 'true':
                            counter[True] += 1
                        elif str(on_anvisa).strip().lower() == 'false':
                            counter[False] += 1
                except json.JSONDecodeError:
                    print("Invalid JSON line encountered. Skipping.")
                pbar.update(1)
    return counter


anvisa_file_path = 'anvisa_lista.json'
med_items_file_path = 'med_items.jsonl'
output_file_path = 'on_anvisa_update.jsonl'


counts = count_on_anvisa(output_file_path)
print("\n'on_anvisa' counts:")
print(f"True: {counts.get(True, 0)}")
print(f"False: {counts.get(False, 0)}")

Counting 'on_anvisa': 100%|██████████| 1487/1487 [00:00<00:00, 18149.39it/s]


'on_anvisa' counts:
True: 163
False: 1324





In [1]:
import pandas as pd

df = pd.read_json('on_anvisa_update.jsonl', lines=True)

anvisa_approved = df[df['on_anvisa'] == True]

anvisa_approved.to_json('only_anvisa_approved.jsonl', orient='records', lines=True)

In [2]:
world_cancer_meds = [   
                     "ABVD | doxorubicin| bleomycin| vinblastine|dacarbazine", "AC | doxorubicin | cyclophosphamide", "trioxido de arsênio", "Arsenico Trioxido e tretinoin", "Abemaciclib", "Abiraterone", 
                     "Abraxane", "Abstral", "Acalabrutinib", "Actimorph", "Actinomycin D", 
                     "Actiq", "Adriamycin", "Afatinib", "Afinitor", "Aldara", "Alectinib", 
                     "Alkeran", "Alpelisib and fulvestrant", "Anastrazole", "Apalutamide", 
                     "Ara C", "Arimidex", "Aromasin", "Arsenico trioxido and Tretinoin", 
                     "Asciminib", "Asparaginase", "Atezolizumab", "Atezolizumab and bevacizumab", 
                     "Atezolizumab and nab-paclitaxel", "Avelumab", "Axitinib", "Azacitidine", 
                     "BEACOPP", "BEAM", "Bendamustine", "Besponsa", "Bevacizumab", 
                     "Bexarotene", "Bicalutamide", "Bleomycin", "Bleomycin, etoposide and platinum", 
                     "Blinatumomab", "Bortezomib", "Bortezomib, cyclophosphamide and dexamethasone", 
                     "Bosulif", "Bosutinib", "Brentuximab", "Brigatinib", "Buserelin", "Busulfan", 
                     "CAPE-OX", "CAPOX", "CCNU", "CHOP", "CPX351", "Cabazitaxel", 
                     "Cabometyx", "Cabozantinib", "Caelyx", "Calpol", "Campto", "Capecitabine", 
                     "Caprelsa", "CarboTaxol", "Carboplatin", "Carboplatin and etoposide", 
                     "Carboplatin and paclitaxel", "Carfilzomib and dexamethasone", "Carmustine", 
                     "Casodex", "Cemiplimab", "Ceritinib", "Cetuximab", "Chlorambucil", 
                     "Cisplatin", "Cisplatin and capecitabine", "Cisplatin and fluorouracil", 
                     "Cisplatin, etoposide and ifosfamide", "Cisplatin, fluorouracil and trastuzumab", 
                     "Cladribine", "Clasteon", "Cometriq", "Cosmegen", "Crisantaspase", 
                     "Crizotinib", "Cyclophosphamide", "Cyclophosphamide, thalidomide and dexamethasone", 
                     "Cyprostat", "Cyproterone acetate", "Cytarabine", "Cytarabine into spinal fluid", 
                     "Cytosine arabinoside", "Dabrafenib", 
                     "Dabrafenib and trametinib", "Dacarbazine", "Dacomitinib", "Dactinomycin", 
                     "Daratumumab", "Daratumumab, bortezomib, thalidomide and dexamethasone", 
                     "Daratumumab, lenalidomide and dexamethasone", "Darolutamide", "Darzalex", 
                     "Dasatinib", "Daunorubicin and cytarabine", "Daunorubicin, cytarabine and midostaurin", 
                     "Decapeptyl SR", "Degarelix", "Denosumab", "Dexamethasone", "Diamorphine", 
                     "Disprol", "Docetaxel", "Docetaxel, cisplatin and fluorouracil", "Dostarlimab", 
                     "Doxifos", "Doxorubicin", "Doxorubicin and ifosfamide", "Durogesic", 
                     "Durvalumab", "Effentora", 
                     "Encorafenib and binimetinib", "Encorafenib and cetuximab", "Entrectinib", 
                     "Enzalutamide", "Epirubicin", "Epirubicin, cisplatin and capecitabine", 
                     "Epirubicin, oxaliplatin and fluorouracil", "Erbitux", "Eribulin", "Erlotinib", 
                     "Erwinase", "Etopophos", "Etoposide", "Everolimus", "Exemestane", "FOLFIRINOX", "FOLFOX", "FOLFOXIRI", "Faslodex", 
                     "Femara", "Fentanyl", "Firmagon", "Fludara", "Fludarabine", 
                     "Fludarabine, cyclophosphamide and rituximab", "Fluorouracil", 
                     "Fluorouracil and mitomycin C", "Fluorouracil, Leucovorin, Oxaliplatin and Docetaxel", 
                     "Flutamide", "Folinic acid, fluorouracil and irinotecan", "Fotivda", 
                     "Fulvestrant", "Gefitinib", "GemCarbo", "GemTaxol", 
                     "Gemcitabine", "Gemcitabine and capecitabine", "Gemcitabine and cisplatin", 
                     "Gemcitabine and nab-paclitaxel", "Gemcitabine and paclitaxel", 
                     "Gemtuzumab ozogamicin, daunorubicin and cytarabine", "Gemzar", 
                     "Gilteritinib", "Giotrif", "Gliadel", "Glivec", "Gonapeptyl Depot", 
                     "Goserelin for breast cancer", "Goserelin for prostate cancer", 
                     "Granulocyte colony stimulating factor","Halaven", "Herceptin", 
                     "Herzuma", "Hycamtin", "Hydrea", "Hydrocortisone", "Hydroxycarbamide", 
                     "Hydroxyurea","Ibandronic acid", "Ibrance", "Ibrutinib", 
                     "Ibuprofen", "Iclusig", "Idarubicin", "Ifosfamide", "Imatinib", 
                     "Imiquimod cream", "Inotuzumab ozogamicin", "Ipilimumab", 
                     "Ipilimumab and nivolumab", "IrCap", "Iressa", "Irinotecan", 
                     "Irinotecan and capecitabine", "Irinotecan de Gramont", "Irinotecan modified de Gramont", 
                     "Ivosidenib", "Ivosidenib and azacitidine", "Ixazomib, lenalidomide and dexamethasone", 
                     "Jevtana","Kadcyla", "Kapake", "Keytruda", "Kisqali", 
                     "L-DA", "Lanreotide", "Larotrectinib", "Lenalidomide", "Lenvatinib", 
                     "Letrozole", "Leukeran", "Leuprorelin", "Leustat", "Levact", 
                     "Liposomal daunorubicin and cytarabine", "Liposomal doxorubicin", 
                     "Litak", "Lomustine", "Lonsurf", "Lorlatinib", "Lutrate", "Lynparza", 
                     "Lysodren", "Mabthera", 
                     "Medroxyprogesterone acetate", "Megace", "Megestrol acetate", "Melphalan", 
                     "Mepact", "Mercaptopurine", "Methotrexate", "Methylprednisolone", 
                     "Midostaurin", "Mifamurtide", "Mitomycin C", "Mitotane", "Mitoxana", 
                     "Mitoxantrone", "Mobocertinib", "Modified de Gramont", "Morphgesic SR", 
                     "Morphine", "Nab paclitaxel", "Navelbine", "Nelarabine", "Neratinib", 
                     "Nerlynx", "Nexavar", "Nilotinib", "Nintedanib", "Nipent", "Niraparib", 
                     "Nivolumab","Obinutuzumab", "Octreotide", "Olaparib", "Ontruzant", 
                     "Opdivo", "Oramorph", "Osimertinib", "OxCap", "Oxaliplatin", 
                     "Oxaliplatin and capecitabine", "PC", "PE", "PMitCEBO", "Paclitaxel", 
                     "Paclitaxel and carboplatin", "Palbociclib", "Pamidronate", "Pamidronate disodium", 
                     "Panadol", "Panitumumab", "Panobinostat, bortezomib and dexamethasone", 
                     "Paracetamol", "Pazopanib", "Peginterferon alfa 2a", "Pembrolizumab", 
                     "Pemetrexed", "Pemetrexed and carboplatin", "Pemetrexed and cisplatin", 
                     "Pemigatinib", "Pentostatin", "Perjeta", "Pertuzumab", 
                     "Phesgo or trastuzumab and pertuzumab", 
                     "Polatuzumab vedotin, bendamustine and rituximab", 
                     "Pomalidomide and dexamethasone", "Ponatinib", "Prednisolone", 
                     "Procarbazine", "Procarbazine, lomustine and vincristine", "Prolia", 
                     "Prostap", "Provera", "R-CHOP", "R-CVP", "R-DHAP", "R-ESHAP", 
                     "R-Idelalisib", "RICE", "Raloxifene", "Raltitrexed", "Regorafenib", 
                     "Revlimid", "Ribociclib", "Rituximab", "Rixathon", "Rubraca", "Rucaparib", 
                     "Ruxience", "Ruxolitinib", "Sacituzumab govitecan", "Selpercatinib", 
                     "Sevredol", "Sodium clodronate", "Solpadol", "Sorafenib", "Steroids", 
                     "Stivarga", "Streptozocin", "Sunitinib", "Sutent","TIP", "Tafinlar", 
                     "Tagrisso", "Talazoparib", "Talimogene laherparepvec", "Tamoxifen", 
                     "Tarceva", "Targretin", "Tasigna", "Taxol", "Taxotere", "Taxotere and cyclophosphamide", 
                     "Tecentriq", "Temodal", "Temozolomide", "Tepadina", "Tepotinib", 
                     "Thiotepa", "Tivozanib", "Tomudex", "Topotecan", "Trabectedin", 
                     "Trastuzumab", "Trastuzumab and pertuzumab", "Trastuzumab deruxtecan", 
                     "Trastuzumab emtansine", "Treosulfan", "Trifluridine and tipiracil", 
                     "Triptorelin", "Trisenox", "Truxima", "Tucatinib, trastuzumab and capecitabine", 
                     "Tylex","VDC/IE", "VIDE", "Vargatef", "VeIP", "Vectibix", "Velcade", 
                     "Vemurafenib", "Venetoclax", "Vesanoid", "Vidaza", "Vinblastine", 
                     "Vincristine", "Vincristine, actinomycin D and cyclophosphamide", 
                     "Vincristine, actinomycin D and ifosfamide", "Vinorelbine", "Votrient", 
                     "Vyxeos", "X", "XELOX", "Xalkori", "Xeloda", "Xgeva", "Xospata", "Xtandi", 
                     "Yervoy", "Yondelis","Zanosar", "Zelboraf", "Zoladex (breast cancer)", 
                     "Zoladex (prostate cancer)", "Zoledronic acid", "Zometa", "Zomorph", 
                     "Zydelig", "Zytiga"
                ]

In [12]:
world_cancer_meds_traduzido = [
  "ABVD | doxorubicin| bleomycin| vinblastine|dacarbazine", "AC | doxorubicin | cyclophosphamide", "trioxido de arsênio", "Arsenico Trioxido e tretinoin", "Abemaciclib", "Abiraterone", 
  "Abraxane", "Abstral", "Acalabrutinib", "Actimorph", "Actinomycin D", 
  "Actiq", "Adriamycin", "Afatinib", "Afinitor", "Aldara", "Alectinib", 
  "Alkeran", "Alpelisib and fulvestrant", "Anastrazole", "Apalutamide", 
  "Ara C", "Arimidex", "Aromasin", "Arsenico trioxido and Tretinoin", 
  "Asciminib", "Asparaginase", "Atezolizumab", "Atezolizumab and bevacizumab", 
  "Atezolizumab and nab-paclitaxel", "Avelumab", "Axitinib", "Azacitidine", 
  "BEACOPP", "BEAM", "Bendamustine", "Besponsa", "Bevacizumab", 
  "Bexarotene", "Bicalutamide", "Bleomycin", "Bleomycin, etoposide and platinum", 
  "Blinatumomab", "Bortezomib", "Bortezomib, cyclophosphamide and dexamethasone", 
  "Bosulif", "Bosutinib", "Brentuximab", "Brigatinib", "Buserelin", "Busulfan", 
  "CAPE-OX", "CAPOX", "CCNU", "CHOP", "CPX351", "Cabazitaxel", 
  "Cabometyx", "Cabozantinib", "Caelyx", "Calpol", "Campto", "Capecitabine", 
  "Caprelsa", "CarboTaxol", "Carboplatin", "Carboplatin and etoposide", 
  "Carboplatin and paclitaxel", "Carfilzomib and dexamethasone", "Carmustine", 
  "Casodex", "Cemiplimab", "Ceritinib", "Cetuximab", "Chlorambucil", 
  "Cisplatin", "Cisplatin and capecitabine", "Cisplatin and fluorouracil", 
  "Cisplatin, etoposide and ifosfamide", "Cisplatin, fluorouracil and trastuzumab", 
  "Cladribine", "Clasteon", "Cometriq", "Cosmegen", "Crisantaspase", 
  "Crizotinib", "Cyclophosphamide", "Cyclophosphamide, thalidomide and dexamethasone", 
  "Cyprostat", "Cyproterone acetate", "Cytarabine", "Cytarabine into spinal fluid", 
  "Cytosine arabinoside", "Dabrafenib","Dabrafenib and trametinib", "Dacarbazine", "Dacomitinib", "Dactinomycin", 
  "Daratumumab", "Daratumumab, bortezomib, thalidomide and dexamethasone", 
  "Daratumumab, lenalidomide and dexamethasone", "Darolutamide", "Darzalex", 
  "Dasatinib", "Daunorubicin and cytarabine", "Daunorubicin, cytarabine and midostaurin", 
  "Decapeptyl SR", "Degarelix", "Denosumab", "Dexamethasone", "Diamorphine", 
  "Disprol", "Docetaxel", "Docetaxel, cisplatin and fluorouracil", "Dostarlimab", 
  "Doxifos", "Doxorubicin", "Doxorubicin and ifosfamide", "Durogesic", 
  "Durvalumab", "Effentora","Encorafenib and binimetinib", "Encorafenib and cetuximab", "Entrectinib", 
  "Enzalutamide", "Epirubicin", "Epirubicin, cisplatin and capecitabine", 
  "Epirubicin, oxaliplatin and fluorouracil", "Erbitux", "Eribulin", "Erlotinib", 
  "Erwinase", "Etopophos", "Etoposide", "Everolimus", "Exemestane", "FOLFIRINOX", "FOLFOX", "FOLFOXIRI", "Faslodex", 
  "Femara", "Fentanyl", "Firmagon", "Fludara", "Fludarabine", 
  "Fludarabine, cyclophosphamide and rituximab", "Fluorouracil", 
  "Fluorouracil and mitomycin C", "Fluorouracil, Leucovorin, Oxaliplatin and Docetaxel", 
  "Flutamide", "Folinic acid, fluorouracil and irinotecan", "Fotivda", 
  "Fulvestrant", "Gefitinib", "GemCarbo", "GemTaxol", 
  "Gemcitabine", "Gemcitabine and capecitabine", "Gemcitabine and cisplatin", 
  "Gemcitabine and nab-paclitaxel", "Gemcitabine and paclitaxel", 
  "Gemtuzumab ozogamicin, daunorubicin and cytarabine", "Gemzar", 
  "Gilteritinib", "Giotrif", "Gliadel", "Glivec", "Gonapeptyl Depot", 
  "Goserelin for breast cancer", "Goserelin for prostate cancer", 
  "Granulocyte colony stimulating factor","Halaven", "Herceptin", 
  "Herzuma", "Hycamtin", "Hydrea", "Hydrocortisone", "Hydroxycarbamide", 
  "Hydroxyurea","Ibandronic acid", "Ibrance", "Ibrutinib", 
  "Ibuprofen", "Iclusig", "Idarubicin", "Ifosfamide", "Imatinib", 
  "Imiquimod cream", "Inotuzumab ozogamicin", "Ipilimumab", 
  "Ipilimumab and nivolumab", "IrCap", "Iressa", "Irinotecan", 
  "Irinotecan and capecitabine", "Irinotecan de Gramont", "Irinotecan modified de Gramont", 
  "Ivosidenib", "Ivosidenib and azacitidine", "Ixazomib, lenalidomide and dexamethasone", 
  "Jevtana","Kadcyla", "Kapake", "Kisqali","Keytruda", 
  "Lanreotide", "Larotrectinib", "Lenalidomide", "Lenvatinib", 
  "Letrozole", "Leukeran", "Leuprorelin", "Leustat", "Levact", 
  "Liposomal daunorubicin and cytarabine", "Liposomal doxorubicin", 
  "Litak", "Lomustine", "Lonsurf", "Lorlatinib", "Lutrate", "Lynparza", 
  "Lysodren", "Mabthera", "Medroxyprogesterone acetate", "Megace", "Megestrol acetate", "Melphalan", 
  "Mepact", "Mercaptopurine", "Methotrexate", "Methylprednisolone", 
  "Midostaurin", "Mifamurtide", "Mitomycin C", "Mitotane", "Mitoxana", 
  "Mitoxantrone", "Mobocertinib", "Modified de Gramont", "Morphgesic SR", 
  "Morphine", "Nab paclitaxel", "Navelbine", "Nelarabine", "Neratinib", 
  "Nerlynx", "Nexavar", "Nilotinib", "Nintedanib", "Nipent", "Niraparib", 
  "Nivolumab","Obinutuzumab", "Octreotide", "Olaparib", "Ontruzant", 
  "Opdivo", "Oramorph", "Osimertinib", "OxCap", "Oxaliplatin", 
  "Oxaliplatin and capecitabine", "PMitCEBO", "Paclitaxel", 
  "Paclitaxel and carboplatin", "Palbociclib", "Pamidronate", "Pamidronate disodium", 
  "Panadol", "Panitumumab", "Panobinostat, bortezomib and dexamethasone", 
  "Paracetamol", "Pazopanib", "Peginterferon alfa 2a", "Pembrolizumab", 
  "Pemetrexed", "Pemetrexed and carboplatin", "Pemetrexed and cisplatin", 
  "Pemigatinib", "Pentostatin", "Perjeta", "Pertuzumab", 
  "Phesgo or trastuzumab and pertuzumab", 
  "Polatuzumab vedotin, bendamustine and rituximab", 
  "Pomalidomide and dexamethasone", "Ponatinib", "Prednisolone", 
  "Procarbazine", "Procarbazine, lomustine and vincristine", "Prolia", 
  "Prostap", "Provera", "RICE", "Raloxifene", "Raltitrexed", "Regorafenib", 
  "Revlimid", "Ribociclib", "Rituximab", "Rixathon", "Rubraca", "Rucaparib", 
  "Ruxience", "Ruxolitinib", "Sacituzumab govitecan", "Selpercatinib", 
  "Sevredol", "Sodium clodronate", "Solpadol", "Sorafenib", "Steroids", 
  "Stivarga", "Streptozocin", "Sunitinib", "Sutent","TIP", "Tafinlar", 
  "Tagrisso", "Talazoparib", "Talimogene laherparepvec", "Tamoxifen", 
  "Tarceva", "Targretin", "Tasigna", "Taxol", "Taxotere", "Taxotere and cyclophosphamide", 
  "Tecentriq", "Temodal", "Temozolomide", "Tepadina", "Tepotinib", 
  "Thiotepa", "Tivozanib", "Tomudex", "Topotecan", "Trabectedin", 
  "Trastuzumab", "Trastuzumab and pertuzumab", "Trastuzumab deruxtecan", 
  "Trastuzumab emtansine", "Treosulfan", "Trifluridine and tipiracil", 
  "Triptorelin", "Trisenox", "Truxima", "Tucatinib, trastuzumab and capecitabine", 
  "Tylex", "Vargatef",  "Vectibix", "Velcade", 
  "Vemurafenib", "Venetoclax", "Vesanoid", "Vidaza", "Vinblastine", 
  "Vincristine", "Vincristine, actinomycin D and cyclophosphamide", 
  "Vincristine, actinomycin D and ifosfamide", "Vinorelbine", "Votrient", 
  "Vyxeos", "XELOX", "Xalkori", "Xeloda", "Xgeva", "Xospata", "Xtandi", 
  "Yervoy", "Yondelis","Zanosar", "Zelboraf", "Zoladex (breast cancer)", 
  "Zoladex (prostate cancer)", "Zoledronic acid", "Zometa", "Zomorph", 
  "Zydelig", "Zytiga"
]

In [13]:
world_cancer_meds_traduzido = [med.lower() for med in world_cancer_meds]
world_cancer_meds_traduzido

['abvd | doxorubicin| bleomycin| vinblastine|dacarbazine',
 'ac | doxorubicin | cyclophosphamide',
 'trioxido de arsênio',
 'arsenico trioxido e tretinoin',
 'abemaciclib',
 'abiraterone',
 'abraxane',
 'abstral',
 'acalabrutinib',
 'actimorph',
 'actinomycin d',
 'actiq',
 'adriamycin',
 'afatinib',
 'afinitor',
 'aldara',
 'alectinib',
 'alkeran',
 'alpelisib and fulvestrant',
 'anastrazole',
 'apalutamide',
 'ara c',
 'arimidex',
 'aromasin',
 'arsenico trioxido and tretinoin',
 'asciminib',
 'asparaginase',
 'atezolizumab',
 'atezolizumab and bevacizumab',
 'atezolizumab and nab-paclitaxel',
 'avelumab',
 'axitinib',
 'azacitidine',
 'beacopp',
 'beam',
 'bendamustine',
 'besponsa',
 'bevacizumab',
 'bexarotene',
 'bicalutamide',
 'bleomycin',
 'bleomycin, etoposide and platinum',
 'blinatumomab',
 'bortezomib',
 'bortezomib, cyclophosphamide and dexamethasone',
 'bosulif',
 'bosutinib',
 'brentuximab',
 'brigatinib',
 'buserelin',
 'busulfan',
 'cape-ox',
 'capox',
 'ccnu',
 'chop

In [14]:
import pandas as pd

anvisa = pd.read_json('anvisa_lista.json')

item = anvisa.iloc[0]
item['MEDICAMENTO'].lower()


'ziagenavir'

In [15]:
world_meds_anvisa = []

for i in range(0, len(anvisa)):
    med = anvisa.iloc[i]
    if med['MEDICAMENTO'].lower() in world_cancer_meds_traduzido:
        world_meds_anvisa.append(med['MEDICAMENTO'].lower())
    

world_meds_off_anvisa = [med for med in world_cancer_meds_traduzido if med.lower() not in world_meds_anvisa]


print(world_meds_off_anvisa)

['abvd | doxorubicin| bleomycin| vinblastine|dacarbazine', 'ac | doxorubicin | cyclophosphamide', 'trioxido de arsênio', 'arsenico trioxido e tretinoin', 'abemaciclib', 'abiraterone', 'abstral', 'acalabrutinib', 'actimorph', 'actinomycin d', 'actiq', 'adriamycin', 'afatinib', 'aldara', 'alectinib', 'alpelisib and fulvestrant', 'anastrazole', 'apalutamide', 'ara c', 'arsenico trioxido and tretinoin', 'asciminib', 'asparaginase', 'atezolizumab', 'atezolizumab and bevacizumab', 'atezolizumab and nab-paclitaxel', 'avelumab', 'axitinib', 'azacitidine', 'beacopp', 'beam', 'bendamustine', 'besponsa', 'bevacizumab', 'bexarotene', 'bicalutamide', 'bleomycin', 'bleomycin, etoposide and platinum', 'blinatumomab', 'bortezomib', 'bortezomib, cyclophosphamide and dexamethasone', 'bosutinib', 'brentuximab', 'brigatinib', 'buserelin', 'busulfan', 'cape-ox', 'capox', 'ccnu', 'chop', 'cpx351', 'cabazitaxel', 'cabozantinib', 'caelyx', 'calpol', 'campto', 'capecitabine', 'carbotaxol', 'carboplatin', 'carb

In [16]:
print(f'Número de Medicamentos Mundialmente Usados: {len(world_cancer_meds_traduzido)}')
print('\n')
print(f'Número de Medicamentos Mundialmente Usados aprovados pela Anvisa: {len(world_meds_anvisa)}')
print('\n')
print(f'Número de Medicamentos Mundialmente Usados fora da Anvisa: {len(world_meds_off_anvisa)}')

Número de Medicamentos Mundialmente Usados: 392


Número de Medicamentos Mundialmente Usados aprovados pela Anvisa: 94


Número de Medicamentos Mundialmente Usados fora da Anvisa: 341


In [20]:
world_med = pd.DataFrame(world_cancer_meds_traduzido)
world_med

Unnamed: 0,0
0,abvd | doxorubicin| bleomycin| vinblastine|dac...
1,ac | doxorubicin | cyclophosphamide
2,trioxido de arsênio
3,arsenico trioxido e tretinoin
4,abemaciclib
...,...
387,zoledronic acid
388,zometa
389,zomorph
390,zydelig


In [21]:
# Remove duplicates from the list for better comparison
world_meds_anvisa_unique = set(world_meds_anvisa)

# Create a new boolean column to check if the value in column '0' is in the list
world_med['APROVADO_ANVISA'] = world_med[0].apply(lambda x: x in world_meds_anvisa_unique)


In [22]:
world_med.rename(columns={0:'MEDICAMENTO'}, inplace=True)
world_med

Unnamed: 0,MEDICAMENTO,APROVADO_ANVISA
0,abvd | doxorubicin| bleomycin| vinblastine|dac...,False
1,ac | doxorubicin | cyclophosphamide,False
2,trioxido de arsênio,False
3,arsenico trioxido e tretinoin,False
4,abemaciclib,False
...,...,...
387,zoledronic acid,False
388,zometa,True
389,zomorph,False
390,zydelig,False


In [23]:
world_med.to_json('medicamento_mundial_cancer_anvisa.json', orient='records')

In [None]:
siglaTribunal = [
    {
        "sigla": "CNJ",
        "nome": "CNJ - Conselho Nacional de Justiça"
    },
    {
        "sigla": "PJeCor",
        "nome": "PJeCor - Corregedorias"
    },
    {
        "sigla": "SEEU",
        "nome": "Sistema Eletrônico de Execução Unificado"
    },
    {
        "sigla": "CJF",
        "nome": "CJF - Conselho da Justiça Federal"
    },
    {
        "sigla": "CSJT",
        "nome": "CSJT - Conselho Superior da Justiça do Trabalho"
    },
    {
        "sigla": "STF",
        "nome": "STF - Supremo Tribunal Federal"
    },
    {
        "sigla": "STJ",
        "nome": "STJ - Superior Tribunal de Justiça"
    },
    {
        "sigla": "STM",
        "nome": "STM - Superior Tribunal Militar"
    },
    {
        "sigla": "TJAC",
        "nome": "TJAC - Tribunal de Justiça do Acre"
    },
    {
        "sigla": "TJAL",
        "nome": "TJAL - Tribunal de Justiça de Alagoas"
    },
    {
        "sigla": "TJAM",
        "nome": "TJAM - Tribunal de Justiça do Amazonas"
    },
    {
        "sigla": "TJAP",
        "nome": "TJAP - Tribunal de Justiça do Amapá"
    },
    {
        "sigla": "TJBA",
        "nome": "TJBA - Tribunal de Justiça da Bahia"
    },
    {
        "sigla": "TJCE",
        "nome": "TJCE - Tribunal de Justiça do Ceará"
    },
    {
        "sigla": "TJDFT",
        "nome": "TJDFT - Tribunal de Justiça do Distrito Federal e Territórios"
    },
    {
        "sigla": "TJES",
        "nome": "TJES - Tribunal de Justiça do Espírito Santo"
    },
    {
        "sigla": "TJGO",
        "nome": "TJGO - Tribunal de Justiça de Goiás"
    },
    {
        "sigla": "TJMA",
        "nome": "TJMA - Tribunal de Justiça do Maranhão"
    },
    {
        "sigla": "TJMG",
        "nome": "TJMG - Tribunal de Justiça de Minas Gerais"
    },
    {
        "sigla": "TJMMG",
        "nome": "TJMMG - Tribunal de Justiça Militar do Estado de Minas Gerais"
    },
    {
        "sigla": "TJMRS",
        "nome": "TJMRS - Tribunal de Justiça Militar do Estado do Rio Grande do sul"
    },
    {
        "sigla": "TJMS",
        "nome": "TJMS - Tribunal de Justiça do Mato Grosso do Sul"
    },
    {
        "sigla": "TJMSP",
        "nome": "TJMSP - Tribunal de Justiça Militar do Estado de São Paulo"
    },
    {
        "sigla": "TJMT",
        "nome": "TJMT - Tribunal de Justiça do Mato Grosso"
    },
    {
        "sigla": "TJPA",
        "nome": "TJPA - Tribunal de Justiça do Pará"
    },
    {
        "sigla": "TJPB",
        "nome": "TJPB - Tribunal de Justiça da Paraíba"
    },
    {
        "sigla": "TJPE",
        "nome": "TJPE - Tribunal de Justiça de Pernambuco"
    },
    {
        "sigla": "TJPI",
        "nome": "TJPI - Tribunal de Justiça do Piauí"
    },
    {
        "sigla": "TJPR",
        "nome": "TJPR - Tribunal de Justiça do Paraná"
    },
    {
        "sigla": "TJRJ",
        "nome": "TJRJ - Tribunal de Justiça do Rio de Janeiro"
    },
    {
        "sigla": "TJRN",
        "nome": "TJRN - Tribunal de Justiça do Rio Grande do Norte"
    },
    {
        "sigla": "TJRO",
        "nome": "TJRO - Tribunal de Justiça de Rondônia"
    },
    {
        "sigla": "TJRR",
        "nome": "TJRR - Tribunal de Justiça de Roraima"
    },
    {
        "sigla": "TJRS",
        "nome": "TJRS - Tribunal de Justiça do Rio Grande do Sul"
    },
    {
        "sigla": "TJSC",
        "nome": "TJSC - Tribunal de Justiça de Santa Catarina"
    },
    {
        "sigla": "TJSE",
        "nome": "TJSE - Tribunal de Justiça de Sergipe"
    },
    {
        "sigla": "TJSP",
        "nome": "TJSP - Tribunal de Justiça de São Paulo"
    },
    {
        "sigla": "TJTO",
        "nome": "TJTO - Tribunal de Justiça do Estado de Tocantins"
    },
    {
        "sigla": "TRE-AC",
        "nome": "TRE-AC - Tribunal Regional Eleitoral do Acre"
    },
    {
        "sigla": "TRE-AL",
        "nome": "TRE-AL - Tribunal Regional Eleitoral de Alagoas"
    },
    {
        "sigla": "TRE-AM",
        "nome": "TRE-AM - Tribunal Regional Eleitoral do Amazonas"
    },
    {
        "sigla": "TRE-AP",
        "nome": "TRE-AP - Tribunal Regional Eleitoral do Amapá"
    },
    {
        "sigla": "TRE-BA",
        "nome": "TRE-BA - Tribunal Regional Eleitoral da Bahia"
    },
    {
        "sigla": "TRE-CE",
        "nome": "TRE-CE - Tribunal Regional Eleitoral do Ceará"
    },
    {
        "sigla": "TRE-DF",
        "nome": "TRE-DF - Tribunal Regional Eleitoral do Distrito Federal e Territórios"
    },
    {
        "sigla": "TRE-ES",
        "nome": "TRE-ES - Tribunal Regional Eleitoral do Espírito Santo"
    },
    {
        "sigla": "TRE-GO",
        "nome": "TRE-GO - Tribunal Regional Eleitoral de Goiás"
    },
    {
        "sigla": "TRE-MA",
        "nome": "TRE-MA - Tribunal Regional Eleitoral do Maranhão"
    },
    {
        "sigla": "TRE-MG",
        "nome": "TRE-MG - Tribunal Regional Eleitoral de Minas Gerais"
    },
    {
        "sigla": "TRE-MS",
        "nome": "TRE-MS - Tribunal Regional Eleitoral do Mato Grosso do Sul"
    },
    {
        "sigla": "TRE-MT",
        "nome": "TRE-MT - Tribunal Regional Eleitoral do do Mato Grosso"
    },
    {
        "sigla": "TRE-PA",
        "nome": "TRE-PA - Tribunal Regional Eleitoral do do Pará"
    },
    {
        "sigla": "TRE-PB",
        "nome": "TRE-PB - Tribunal Regional Eleitoral da Paraíba"
    },
    {
        "sigla": "TRE-PE",
        "nome": "TRE-PE - Tribunal Regional Eleitoral de Pernambuco"
    },
    {
        "sigla": "TRE-PI",
        "nome": "TRE-PI - Tribunal Regional Eleitoral do Piauí"
    },
    {
        "sigla": "TRE-PR",
        "nome": "TRE-PR - Tribunal Regional Eleitoral do Paraná"
    },
    {
        "sigla": "TRE-RJ",
        "nome": "TRE-RJ - Tribunal Regional Eleitoral do Rio de Janeiro"
    },
    {
        "sigla": "TRE-RN",
        "nome": "TRE-RN - Tribunal Regional Eleitoral do Rio Grande do Norte"
    },
    {
        "sigla": "TRE-RO",
        "nome": "TRE-RO - Tribunal Regional Eleitoral de Rondônia"
    },
    {
        "sigla": "TRE-RR",
        "nome": "TRE-RR - Tribunal Regional Eleitoral de Roraima"
    },
    {
        "sigla": "TRE-RS",
        "nome": "TRE-RS - Tribunal Regional Eleitoral do Rio Grande do Sul"
    },
    {
        "sigla": "TRE-SC",
        "nome": "TRE-SC - Tribunal Regional Eleitoral de Santa Catarina"
    },
    {
        "sigla": "TRE-SE",
        "nome": "TRE-SE - Tribunal Regional Eleitoral de Sergipe"
    },
    {
        "sigla": "TRE-SP",
        "nome": "TRE-SP - Tribunal Regional Eleitoral de São Paulo"
    },
    {
        "sigla": "TRE-TO",
        "nome": "TRE-TO - Tribunal Regional Eleitoral de Tocantins"
    },
    {
        "sigla": "TRF1",
        "nome": "TRF1 - Tribunal Regional Federal da 1ª Região"
    },
    {
        "sigla": "TRF2",
        "nome": "TRF2 - Tribunal Regional Federal da 2ª Região"
    },
    {
        "sigla": "TRF3",
        "nome": "TRF3 - Tribunal Regional Federal da 3ª Região"
    },
    {
        "sigla": "TRF4",
        "nome": "TRF4 - Tribunal Regional Federal da 4ª Região"
    },
    {
        "sigla": "TRF5",
        "nome": "TRF5 - Tribunal Regional Federal da 5ª Região"
    },
    {
        "sigla": "TRF6",
        "nome": "TRF6 - Tribunal Regional Federal da 6ª Região"
    },
    {
        "sigla": "TRT1",
        "nome": "TRT1 - Tribunal Regional do Trabalho da 1ª Região"
    },
    {
        "sigla": "TRT10",
        "nome": "TRT10 - Tribunal Regional do Trabalho da 10ª Região"
    },
    {
        "sigla": "TRT11",
        "nome": "TRT11 - Tribunal Regional do Trabalho da 11ª Região"
    },
    {
        "sigla": "TRT12",
        "nome": "TRT12 - Tribunal Regional do Trabalho da 12ª Região"
    },
    {
        "sigla": "TRT13",
        "nome": "TRT13 - Tribunal Regional do Trabalho da 13ª Região"
    },
    {
        "sigla": "TRT14",
        "nome": "TRT14 - Tribunal Regional do Trabalho da 14ª Região"
    },
    {
        "sigla": "TRT15",
        "nome": "TRT15 - Tribunal Regional do Trabalho da 15ª Região"
    },
    {
        "sigla": "TRT16",
        "nome": "TRT16 - Tribunal Regional do Trabalho da 16ª Região"
    },
    {
        "sigla": "TRT17",
        "nome": "TRT17 - Tribunal Regional do Trabalho da 17ª Região"
    },
    {
        "sigla": "TRT18",
        "nome": "TRT18 - Tribunal Regional do Trabalho da 18ª Região"
    },
    {
        "sigla": "TRT19",
        "nome": "TRT19 - Tribunal Regional do Trabalho da 19ª Região"
    },
    {
        "sigla": "TRT2",
        "nome": "TRT2 - Tribunal Regional do Trabalho da 2ª Região"
    },
    {
        "sigla": "TRT20",
        "nome": "TRT20 - Tribunal Regional do Trabalho da 20ª Região"
    },
    {
        "sigla": "TRT21",
        "nome": "TRT21 - Tribunal Regional do Trabalho da 21ª Região"
    },
    {
        "sigla": "TRT22",
        "nome": "TRT22 - Tribunal Regional do Trabalho da 22ª Região"
    },
    {
        "sigla": "TRT23",
        "nome": "TRT23 - Tribunal Regional do Trabalho da 23ª Região"
    },
    {
        "sigla": "TRT24",
        "nome": "TRT24 - Tribunal Regional do Trabalho da 24ª Região"
    },
    {
        "sigla": "TRT3",
        "nome": "TRT3 - Tribunal Regional do Trabalho da 3ª Região"
    },
    {
        "sigla": "TRT4",
        "nome": "TRT4 - Tribunal Regional do Trabalho da 4ª Região"
    },
    {
        "sigla": "TRT5",
        "nome": "TRT5 - Tribunal Regional do Trabalho da 5ª Região"
    },
    {
        "sigla": "TRT6",
        "nome": "TRT6 - Tribunal Regional do Trabalho da 6ª Região"
    },
    {
        "sigla": "TRT7",
        "nome": "TRT7 - Tribunal Regional do Trabalho da 7ª Região"
    },
    {
        "sigla": "TRT8",
        "nome": "TRT8 - Tribunal Regional do Trabalho da 8ª Região"
    },
    {
        "sigla": "TRT9",
        "nome": "TRT9 - Tribunal Regional do Trabalho da 9ª Região"
    },
    {
        "sigla": "TSE",
        "nome": "TSE - Tribunal Superior Eleitoral"
    },
    {
        "sigla": "TST",
        "nome": "TST - Tribunal Superior do Trabalho"
    }
]

In [None]:
tipos = [
    {
        "id": "1",
        "nome": "Citação"
    },
    {
        "id": "2",
        "nome": "Intimação"
    },
    {
        "id": "3",
        "nome": "Edital"
    },
    {
        "id": "4",
        "nome": "Lista de distribuição"
    },
    {
        "id": "5",
        "nome": "Pauta de julgamento"
    }
]

In [None]:
meios = [
    {
        "id": "E",
        "nome": "Plataforma de Editais"
    },
    {
        "id": "D",
        "nome": "Diário de Justiça Eletrônico"
    }
]