In [62]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import re
import json
import numpy as np

In [63]:
def getSoupFromURL(url, method='get', data={}):

    if method == 'get':
        res = requests.get(url)
    elif method == 'post':
        res = requests.post(url, data=data)
    else:
        return -1

    if res.status_code == 200:
        soup = BeautifulSoup(res.text, 'html.parser')
        return soup
    else:
        #print (res.status_code)
        return -2

In [64]:
class Medicament:
    def __init__(self, denomination, dosage, labo, poids, age, dateAMM, dateComm, prix, nbComprimes):
        self.denomination = denomination
        self.dosage = dosage
        self.labo = labo
        self.poids = poids
        self.age = age
        self.dateAMM = dateAMM
        self.dateComm = dateComm
        self.prix = prix
        self.nbComprimes = nbComprimes
    
    def to_dict(self):
        return {
            'denomination': self.denomination,
            'dosage': self.dosage,
            'labo': self.labo, 
            'poids': self.poids,
            'age': self.age,
            'dateAMM': self.dateAMM,
            'dateComm': self.dateComm, 
            'prix': self.prix,
            'nbComprimes': self.nbComprimes
        }

In [65]:
def getAge(str):
    regex = re.compile(r'(\d+) ans')
    return np.squeeze(regex.findall(str))

def getPoids(str):
    regex = re.compile(r'(\d+) [kK][gG]')
    return np.squeeze(regex.findall(str))

def getDosage(str):
    regex = re.compile(r'(\d+) [mM][gG]')
    return np.squeeze(regex.findall(str))

def getNbComprimes(str):
    regex = re.compile(r'(\d+)\s+comprimé')
    return np.squeeze(regex.findall(str))

def getMedicamentsList(url):
    
    soup = getSoupFromURL(url)
    
    if soup:

        data = json.loads(soup.text)
        list = []
        for i in range(0, len(data)):
            
            medicament = Medicament(0, 0, 0, 0, 0, 0, 0, 0, 0)
            
            url2 = "https://open-medicaments.fr/api/v1/medicaments/"+str(data[i]['codeCIS'])
            soup2 = getSoupFromURL(url2)
            if soup2:

                data2 = json.loads(soup2.text)
                response = json.dumps(data2, sort_keys = True, indent = 4, separators = (',', ': '))

                medicament.denomination = data2['denomination']
                medicament.dosage = getDosage(data2['denomination'])
                medicament.labo = np.squeeze(data2['titulaires'])
                medicament.poids = getPoids(data2['indicationsTherapeutiques'])
                medicament.age = getAge(data2['indicationsTherapeutiques'])
                medicament.dateAMM = data2['dateAMM']
                medicament.dateComm = data2["presentations"][0]["dateDeclarationCommercialisation"]
                medicament.nbComprimes = getNbComprimes(data2["presentations"][0]["libelle"])
                medicament.prix = data2["presentations"][0]["prix"]

                list.append(medicament)
                
        return list

    else:
        return None

In [66]:
# Main
url = "https://open-medicaments.fr/api/v1/medicaments?query=ibuprofene"

list = getMedicamentsList(url)

if len(list) > 0:
    df = pd.DataFrame()
    df = df.append(pd.DataFrame.from_records([s.to_dict() for s in list]))

    df.to_csv(r'ibuprofene.csv', header=["Denomination", "Dosage", "Laboratoire", 
                                         "Poids Mini",
                                         "Age Mini", "Date AMM", "Date Comm", "Nb Comprimés", "Prix"], 
          index=None, sep=',', mode='a')