# Goal of notebook

- understand where BTLF data is coming from: BNF, Constellations, Babelio
- understand reason of missing data for each source

In [2]:
from rdflib import Graph, Namespace, Literal, URIRef
from rdflib import RDF, RDFS, OWL, XSD
import sys
sys.path.append('../')

sys.path.append('../andre')
import andre.utils as utils
from andre.utils import schema as SCHEMA
import collections
import isbnlib

# bnf

In [3]:
graph_bnf = Graph()
graph_bnf.parse("final_datasets/bnf.ttl", format="turtle")

bnf_isbn_dict = collections.defaultdict(lambda :[])

for bnf_book in graph_bnf.subjects(RDF.type, utils.schema.Book):
    book_data = utils.extract_data_bnf(graph_bnf, bnf_book)
    # if not isLanguageFrench(book_data.language):
    #     print(book_data.language)
    bnf_isbn_dict[book_data.isbn].append(book_data) 

Failed to convert Literal lexical form to value. Datatype=http://www.w3.org/2001/XMLSchema#date, Converter=<function parse_date at 0x000001E0945F0180>
Traceback (most recent call last):
  File "C:\Users\dre\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\rdflib\term.py", line 2084, in _castLexicalToPython
    return conv_func(lexical)  # type: ignore[arg-type]
           ^^^^^^^^^^^^^^^^^^
  File "C:\Users\dre\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\isodate\isodates.py", line 203, in parse_date
    raise ISO8601Error('Unrecognised ISO 8601 date format: %r' % datestring)
isodate.isoerror.ISO8601Error: Unrecognised ISO 8601 date format: 'Port- d-es'
Failed to convert Literal lexical form to value. Datatype=http://www.w3.org/2001/XMLSchema#date, Converter=<function parse_date at 0x000001E0945F0180>
Traceback (most recent call last)

In [4]:
def is_language_french(language):
    return language == "Français" or  language == "fre"

In [5]:
graph_btlf = Graph()
graph_btlf.parse("final_datasets/grapheLivres_BTLF_new_data_EditeursConsolides.ttl", format="turtle")

btlf_isbn_dict = collections.defaultdict(lambda :[])
for btlf_book in graph_btlf.subjects(RDF.type, utils.schema.Book):
    book_data = utils.extract_data_btlf(graph_btlf, btlf_book)
    btlf_isbn_dict[book_data.isbn].append(book_data) 

In [6]:
print(len(btlf_isbn_dict))

26920


In [7]:
bnf_btlf = []
not_bnf_btlf = []
bnf_not_btlf = []
not_bnf_not_btlf = [] # should stay empty

for isbn_bnf in bnf_isbn_dict:
    if isbn_bnf in btlf_isbn_dict:
        bnf_btlf.append(isbn_bnf)
    else:
        bnf_not_btlf.append(isbn_bnf)

for isbn_btlf in btlf_isbn_dict:
    if isbn_btlf not in bnf_isbn_dict:
        not_bnf_btlf.append(isbn_btlf)
    if isbn_btlf in bnf_isbn_dict and isbn_btlf not in bnf_btlf:
        print("error")

In [8]:
print("intersection", len(bnf_btlf))
print("dans bnf mais pas btlf", len(bnf_not_btlf))
print("dans btlf mais pas bnf", len(not_bnf_btlf))

intersection 20794
dans bnf mais pas btlf 9143
dans btlf mais pas bnf 6126


In [9]:
# verifier proportion livres non francais dans (BNF not BTLF)
non_french_book_counter = 0
french_book_counter = 0
non_isbn_13_counter = 0

for isbn in bnf_not_btlf:
    if isbnlib.is_isbn13(isbn) and not is_language_french(bnf_isbn_dict[isbn][0].language):
        non_french_book_counter += 1
    elif isbnlib.is_isbn13(isbn) and is_language_french(bnf_isbn_dict[isbn][0].language):
        french_book_counter += 1
    if not isbnlib.is_isbn13(isbn):
        non_isbn_13_counter += 1

print("french", french_book_counter)
print("non french", non_french_book_counter)
print("non isbn13", non_isbn_13_counter)

french 772
non french 1008
non isbn13 7363


In [10]:
print(len(btlf_isbn_dict))


26920


# constellations

In [11]:
graph_constellations = Graph()
graph_constellations.parse("final_datasets/Constellations.ttl", format="turtle")

constellations_isbn_dict = collections.defaultdict(lambda :[])

for constellations_book in graph_constellations.subjects(RDF.type, utils.schema.Book):
    book_data = utils.extract_data_constellation(graph_constellations, constellations_book)
    constellations_isbn_dict[book_data.isbn].append(book_data) 


In [12]:
print(len(constellations_isbn_dict))
print(len(btlf_isbn_dict))
print(len(bnf_isbn_dict))

11255
26920
29937


In [13]:
constellations_btlf = []
not_constellations_btlf = []
constellations_not_btlf = []
not_constellations_not_btlf = [] # should stay empty

for isbn_constellations in constellations_isbn_dict:
    if isbn_constellations in btlf_isbn_dict:
        constellations_btlf.append(isbn_constellations)
    else:
        constellations_not_btlf.append(isbn_constellations)

for isbn_btlf in btlf_isbn_dict:
    if isbn_btlf not in constellations_isbn_dict:
        not_constellations_btlf.append(isbn_btlf)
    if isbn_btlf in constellations_isbn_dict and isbn_btlf not in constellations_btlf:
        print("error")

In [14]:
print("intersection", len(constellations_btlf))
print("dans constellations mais pas btlf", len(constellations_not_btlf))
print("dans btlf mais pas constellations", len(not_constellations_btlf))

intersection 6683
dans constellations mais pas btlf 4572
dans btlf mais pas constellations 20237


In [15]:
constellations_btlf

['9782350007625',
 '9782742769681',
 '9782895291930',
 '9782848017860',
 '9782924277812',
 '9782352900658',
 '9782896071449',
 '9782081305410',
 '9782330039516',
 '9782211097734',
 '9781474907354',
 '9782747058261',
 '9782070626489',
 '9782203075344',
 '9782812600593',
 '9782070576418',
 '9782740427798',
 '9782748520637',
 '9782745980663',
 '9782362900617',
 '9782742785995',
 '9782745933188',
 '9782362900433',
 '9782070641536',
 '9782035861986',
 '9782253073093',
 '9782745968777',
 '9782373491746',
 '9782922585506',
 '9782070659043',
 '9782732443522',
 '9782812618840',
 '9782896080823',
 '9782764408520',
 '9782070589876',
 '9782266305914',
 '9782211092876',
 '9782211091336',
 '9782354132200',
 '9782355040627',
 '9782211089876',
 '9782215135302',
 '9782211078764',
 '9782020639507',
 '9782918689119',
 '9782733838730',
 '9782211208055',
 '9782070629923',
 '9782211207928',
 '9782882583888',
 '9782354883058',
 '9782812606830',
 '9782764445044',
 '9782211093750',
 '9782355041495',
 '97822661

In [16]:
len(list(set(not_bnf_btlf) & set(constellations_btlf)))

4915

In [17]:
# verifier proportion livres non francais dans (BNF not BTLF)
non_french_book_counter = 0
french_book_counter = 0
non_isbn_13_counter = 0

for isbn in constellations_not_btlf:
    if isbnlib.is_isbn13(isbn) and not is_language_french(constellations_isbn_dict[isbn][0].language):
        non_french_book_counter += 1
    elif isbnlib.is_isbn13(isbn) and is_language_french(constellations_isbn_dict[isbn][0].language):
        french_book_counter += 1
    if not isbnlib.is_isbn13(isbn):
        non_isbn_13_counter += 1

print("french", french_book_counter)
print("non french", non_french_book_counter)
print("non isbn13", non_isbn_13_counter)

french 0
non french 3045
non isbn13 1527


In [18]:
print("non isbn13 in constellations")
len(constellations_not_btlf) - len(list(filter(isbnlib.is_isbn13,constellations_not_btlf)))

non isbn13 in constellations


1527

# investigation of BTLF sources

In [19]:
constellation_counter = 0
bnf_counter = 0
constellations_and_bnf_counter = 0
source_unknown_counter = 0

for btlf_book in btlf_isbn_dict:
    if btlf_book in bnf_isbn_dict and btlf_book in constellations_isbn_dict:
        constellations_and_bnf_counter += 1
    elif btlf_book in bnf_isbn_dict and btlf_book not in constellations_isbn_dict:
        bnf_counter += 1
    elif btlf_book not in bnf_isbn_dict and btlf_book in constellations_isbn_dict:
        constellation_counter += 1
    else:
        print(btlf_book)
        source_unknown_counter += 1

print("constellations and bnf", constellations_and_bnf_counter)
print("constellations only", constellation_counter)
print("bnf only", bnf_counter)
print("source unkown", source_unknown_counter)

9782924645536
9782897091279
9782896579006
9782895911210
9782924996003
9782896578382
9782890749658
9782896576654
9782895370642
9782896332083
9782890463660
9782890469105
9782896577422
9782894354889
9782894355619
9782895850892
9782897512538
9782890217461
9782890218147
9782890218383
9782897740153
9782896510955
9782890218239
9782896512553
9782923342191
9782897140090
9782923342344
9782892950120
9782923342030
9782896074136
9782897142872
9782897700133
9782922225723
9782923896151
9782896070008
9782897141257
9782922225990
9782896070077
9782896480562
9782923813745
9782896074259
9782897701499
9782893813127
9782922225013
9782924563953
9782922225518
9782896074013
9782896074464
9782896071272
9782896481125
9782897525385
9782762122183
9782897125776
9782762117363
9782897125417
9782895129783
9782897770532
9782760942219
9782896867196
9782895123897
9782760999169
9782922892970
9782895124504
9782897852948
9782760933521
9782895124658
9782760942141
9782895123989
9782760947719
9782760933385
9782923196084
978289

# babelio: last BTLF source

In [20]:
graph_babelio = Graph()
graph_babelio.parse("final_datasets/babelio.ttl", format="turtle")

<Graph identifier=Nf9382716e26e403ca2aadd955c8abf8e (<class 'rdflib.graph.Graph'>)>

In [21]:
babelio_isbn_dict = collections.defaultdict(lambda :[])

for babelio_book in graph_babelio.subjects(RDF.type, utils.schema.Book):
    book_data = utils.extract_data_babelio(graph_babelio, babelio_book)
    babelio_isbn_dict[book_data.isbn].append(book_data) 

In [22]:
len(babelio_isbn_dict)

2215

In [23]:
babelio_isbn_dict.keys()

dict_keys(['9782924332016', '', '9782330056032', '9782075191043', '9782266317078', '9782875575173', '9782344000595', '9782742755400', '9782211222600', '9782764434956', '9782895407102', '9782364745056', '9782382120347', '9782811649920', '9782413026570', '9782505076780', '9782373492590', '9782505115304', '9791032712856', '9782380710243', '9782382120446', '9791032704677', '9782369743255', '9782344005682', '9782369740315', '9782723499903', '9782351809082', '9782723494014', '9782818921647', '9782203062382', '9782302025479', '9782355924262', '9782302023192', '9782820309624', '9782355921582', '9782756016641', '9782811603441', '9782505012801', '9782811638771', '9782849656419', '9782356481665', '9782845998650', '9782759500581', '9782756005485', '9782849466841', '9782351002353', '9782811642242', '9782756001548', '9782847899429', '9782845384521', '9782847894516', '9782723447720', '9782845993372', '9782372871976', '9782382120361', '9782355929458', '9791035502553', '9782809450774', '9782362663574',

In [24]:
babelio_isbn_dict['9782234006645'][0].book_authors

[]

In [25]:
len(list(graph_babelio.subjects(RDF.type, utils.schema.Book)))

4169

In [26]:
constellation_counter = 0
bnf_counter = 0
constellations_and_bnf_counter = 0
babelio_counter = 0
source_unknown_counter = 0

for btlf_book in btlf_isbn_dict:
    if btlf_book in bnf_isbn_dict and btlf_book in constellations_isbn_dict:
        constellations_and_bnf_counter += 1
    elif btlf_book in bnf_isbn_dict and btlf_book not in constellations_isbn_dict:
        bnf_counter += 1
    elif btlf_book not in bnf_isbn_dict and btlf_book in constellations_isbn_dict:
        constellation_counter += 1
    elif btlf_book in babelio_isbn_dict:
        babelio_counter += 1
    else:
        print(btlf_book)
        source_unknown_counter += 1

print("constellations and bnf", constellations_and_bnf_counter)
print("constellations only", constellation_counter)
print("bnf only", bnf_counter)
print("babelio only", babelio_counter)
print("source unkown", source_unknown_counter)

constellations and bnf 1768
constellations only 4915
bnf only 19026
babelio only 1211
source unkown 0


In [27]:
constellations_and_bnf_and_babelio_counter = 0
constellations_and_bnf_counter = 0
constellations_and_babelio_counter = 0
babelio_and_bnf_counter = 0
constellation_only_counter = 0
bnf_only_counter = 0
babelio_only_counter = 0

for btlf_book in btlf_isbn_dict:
    if (btlf_book in bnf_isbn_dict 
        and btlf_book in constellations_isbn_dict
        and btlf_book in babelio_isbn_dict):
        constellations_and_bnf_and_babelio_counter += 1
    if (btlf_book in bnf_isbn_dict 
        and btlf_book in constellations_isbn_dict):
        constellations_and_bnf_counter += 1
    if (btlf_book in constellations_isbn_dict 
        and btlf_book in babelio_isbn_dict):
        constellations_and_babelio_counter += 1
    if (btlf_book in bnf_isbn_dict 
        and btlf_book in babelio_isbn_dict):
        babelio_and_bnf_counter += 1
    if (btlf_book in bnf_isbn_dict
        and btlf_book not in constellations_isbn_dict
        and btlf_book not in babelio_isbn_dict):
        bnf_only_counter += 1
    if (btlf_book not in bnf_isbn_dict 
        and btlf_book in constellations_isbn_dict 
        and btlf_book not in babelio_isbn_dict):
        constellation_only_counter += 1
    if (btlf_book not in bnf_isbn_dict 
        and btlf_book not in constellations_isbn_dict 
        and btlf_book in babelio_isbn_dict):
        babelio_only_counter += 1
        print(btlf_book)
    

print("constellations and bnf and babelio", constellations_and_bnf_and_babelio_counter)
print("constellations and bnf", constellations_and_bnf_counter)
print("constellations and babelio", constellations_and_babelio_counter)
print("babelio and bnf", babelio_and_bnf_counter)
print("babelio only", babelio_only_counter)
print("bnf only", bnf_only_counter)
print("constellations only", constellation_only_counter)

9782924645536
9782897091279
9782896579006
9782895911210
9782924996003
9782896578382
9782890749658
9782896576654
9782895370642
9782896332083
9782890463660
9782890469105
9782896577422
9782894354889
9782894355619
9782895850892
9782897512538
9782890217461
9782890218147
9782890218383
9782897740153
9782896510955
9782890218239
9782896512553
9782923342191
9782897140090
9782923342344
9782892950120
9782923342030
9782896074136
9782897142872
9782897700133
9782922225723
9782923896151
9782896070008
9782897141257
9782922225990
9782896070077
9782896480562
9782923813745
9782896074259
9782897701499
9782893813127
9782922225013
9782924563953
9782922225518
9782896074013
9782896074464
9782896071272
9782896481125
9782897525385
9782762122183
9782897125776
9782762117363
9782897125417
9782895129783
9782897770532
9782760942219
9782896867196
9782895123897
9782760999169
9782922892970
9782895124504
9782897852948
9782760933521
9782895124658
9782760942141
9782895123989
9782760947719
9782760933385
9782923196084
978289

In [32]:
class Author_Alignment:
    def __init__(self, source, isbn_dict):
        self.source = source
        self.alignment_count = 0
        self.matched_isbn_count = 0
        self.one_author_alignment_count = 0 # counts if at least 1 author/illustrator is aligned per isbn
        self.source_count = 0
        self.btlf_count = 0
        self.isbn_dict = isbn_dict
        self.non_aligned_authors = []
        self.metadata_different = set()
    
    def extract_authors_illustrators(self, isbn):
        btlf_book_data =  btlf_isbn_dict[isbn][0]
        authors_illustrators_btlf = btlf_book_data.book_authors + [btlf_book_data.illustrator] if btlf_book_data.illustrator else btlf_book_data.book_authors
        if self.source == "bnf":
             authors_illustrators_source = self.isbn_dict[isbn][0].book_authors
        elif self.source == "constellations" or self.source == "babelio":
            book_data = self.isbn_dict[isbn][0]
            authors_illustrators_source = book_data.book_authors + [book_data.illustrator] if book_data.illustrator else book_data.book_authors
        return authors_illustrators_source, authors_illustrators_btlf

    def count_total_author_alignments_source(self): # will count every alignement
        for btlf_book in btlf_isbn_dict:
            if btlf_book in self.isbn_dict:
                self.count_author_alignements_isbn(btlf_book)

    def count_one_author_alignments_source(self): # will check if at least one author is aligned per isbn
        for btlf_book in btlf_isbn_dict:
            if btlf_book in self.isbn_dict:
                self.matched_isbn_count += 1
                if self.is_one_author_aligned_isbn(btlf_book):
                    self.one_author_alignment_count += 1

    def count_author_alignements_isbn(self, isbn):
        authors_illustrators_source, authors_illustrators_btlf = self.extract_authors_illustrators(isbn)    
        if authors_illustrators_source and authors_illustrators_btlf:
            self.source_count += len(authors_illustrators_source)
            self.btlf_count += len(authors_illustrators_btlf)
            for author_source in authors_illustrators_source:
                for author_btlf in authors_illustrators_btlf:
                    if author_source and author_btlf:
                        author_btlf_split = author_btlf.rsplit(",")
                        if len(author_btlf_split) == 2:
                            author_btlf = author_btlf_split[1] + " " + author_btlf_split[0]
                        else:
                            author_btlf = author_btlf
                        res = utils.preprocess_author_name(author_source) == utils.preprocess_author_name(author_btlf)
                        if res:
                            self.alignment_count += 1
       
    def is_one_author_aligned_isbn(self, isbn):
            authors_illustrators_source, authors_illustrators_btlf = self.extract_authors_illustrators(isbn)    
            if authors_illustrators_source and authors_illustrators_btlf:
                for author_source in authors_illustrators_source:
                    for author_btlf in authors_illustrators_btlf:
                        if author_source and author_btlf:
                            author_btlf_split = author_btlf.rsplit(",")
                            if len(author_btlf_split) == 2:
                                author_btlf = author_btlf_split[1] + " " + author_btlf_split[0]
                            else:
                                author_btlf = author_btlf
                            res = utils.preprocess_author_name(author_source) == utils.preprocess_author_name(author_btlf)
                            if res:
                                return True
                            else: # exploration of how many books have incorrect mapping between their isbn and metadata
                                book_data_source = self.isbn_dict[isbn][0]
                                book_data_btlf = btlf_isbn_dict[isbn][0]
                                if utils.preprocess_author_name(book_data_source.book_name) != utils.preprocess_author_name(book_data_btlf.book_name) and book_data_source.publication_date != book_data_btlf.publication_date:
                                    self.metadata_different.add(isbn)
            print(isbn)
            return False
    
    def print(self):
        print(f"{self.source} total author alignement", self.alignment_count , "/", self.source_count, "=", round(self.alignment_count / self.source_count * 100), "%")
        print(f"{self.source} one author alignement", self.one_author_alignment_count , "/", self.matched_isbn_count, "=", round(self.one_author_alignment_count / self.matched_isbn_count * 100), "%")

print("###################### bnf")
bnf_btlf_alignements = Author_Alignment(source="bnf", isbn_dict=bnf_isbn_dict)
bnf_btlf_alignements.count_total_author_alignments_source()
bnf_btlf_alignements.count_one_author_alignments_source()

print("###################### constellations")
constellations_btlf_alignements = Author_Alignment(source="constellations", isbn_dict=constellations_isbn_dict)
constellations_btlf_alignements.count_total_author_alignments_source()
constellations_btlf_alignements.count_one_author_alignments_source()

print("###################### babelio")
babelio_btlf_alignements = Author_Alignment(source="babelio", isbn_dict=babelio_isbn_dict)
babelio_btlf_alignements.count_total_author_alignments_source()
babelio_btlf_alignements.count_one_author_alignments_source()

bnf_btlf_alignements.print()
constellations_btlf_alignements.print()
babelio_btlf_alignements.print()

# print("bnf author alignement", bnf_author_alignments , "/", bnf_author_count)
# print("constellations author alignement", constellations_author_alignments , "/", constellations_author_count)
# print("babelio author alignement", babelio_author_alignments , "/", babelio_books)

###################### bnf
9782897140687
9782923342504
9782897141103
9782896862627
9782923841762
9782895125723
9782924720264
9782897990008
9782895408765
9782895403470
9782895407201
9782895401841
9782844555656
9782278091324
9791092353549
9782210966826
9782374252377
9782203215702
9782362663345
9782803623273
9782352890874
9782747027564
9782070622399
9782092522554
9782809405910
9782812601019
9782745942982
9782070630387
9782352890744
9791093466118
9783836544016
9782081287167
9789954213964
9791095397120
9791090103627
9782745961136
9782246824374
9782877677981
9782851818614
9782352892458
9782916046228
9782889083008
9791021405721
9782358322119
9782070599592
9782874262883
9782344027301
9789953315416
9791029800139
9782745978752
9782352045588
9782278081554
9782374083773
9791026813873
9782212567373
9782811664251
9782360811724
9782372590754
9791090103115
9782266230360
9782916859095
9782918911548
9782812606618
9782365930246
9782747045537
9782012023482
9782745956224
9782226328458
9782226221599
9782812

In [34]:
len(babelio_btlf_alignements.metadata_different)

155

In [35]:
babelio_btlf_alignements.metadata_different

{'9780885031405',
 '9782013914291',
 '9782016285947',
 '9782017086949',
 '9782017202387',
 '9782020510011',
 '9782020562546',
 '9782020849432',
 '9782020969192',
 '9782021292527',
 '9782070417674',
 '9782070578931',
 '9782070601479',
 '9782070637065',
 '9782070649747',
 '9782070664207',
 '9782072978371',
 '9782075139106',
 '9782081201415',
 '9782081389526',
 '9782081490055',
 '9782081619197',
 '9782092023129',
 '9782092535981',
 '9782092557228',
 '9782092593554',
 '9782092595312',
 '9782203064645',
 '9782203168602',
 '9782207116104',
 '9782211072755',
 '9782211220682',
 '9782211301695',
 '9782211305853',
 '9782213017617',
 '9782213610337',
 '9782215135869',
 '9782218958243',
 '9782226177902',
 '9782226183255',
 '9782226193384',
 '9782226250858',
 '9782226473134',
 '9782234074767',
 '9782253170853',
 '9782258115637',
 '9782266135726',
 '9782266173476',
 '9782278052776',
 '9782278054749',
 '9782278054763',
 '9782278077946',
 '9782283021231',
 '9782302020092',
 '9782302023192',
 '97823020

In [None]:
babelio_btlf = []
not_babelio_btlf = []
babelio_not_btlf = []
not_babelio_not_btlf = [] # should stay empty

for isbn_babelio in babelio_isbn_dict:
    if isbn_babelio in btlf_isbn_dict:
        babelio_btlf.append(isbn_babelio)
    else:
        babelio_not_btlf.append(isbn_babelio)

for isbn_btlf in btlf_isbn_dict:
    if isbn_btlf not in babelio_isbn_dict:
        not_babelio_btlf.append(isbn_btlf)
    if isbn_btlf in babelio_isbn_dict and isbn_btlf not in babelio_btlf:
        print("error")

In [None]:
print("intersection", len(babelio_btlf))
print("dans babelio mais pas btlf", len(babelio_not_btlf))
print("dans btlf mais pas babelio", len(not_babelio_btlf))

intersection 2052
dans babelio mais pas btlf 163
dans btlf mais pas babelio 24868


## babelio: don't understand why some books not in BTLF

In [None]:
babelio_not_btlf

['',
 '9782764434956',
 '9782845384521',
 '9782221016299',
 '9781023501545',
 '9782375110027',
 '9781770911949',
 '9781021401229',
 '978B088646CCV',
 '978B0B1JM9PG2',
 '9782764429815',
 '9781021400413',
 '978B0861F2QMR',
 '9782895023579',
 '9781023506779',
 '9781021401571',
 '978B09BXCDXHF',
 '9780375508264',
 '9781368013796',
 '9781023508407',
 '9781023503020',
 '9782020327213',
 '9782365085540',
 '9781021405708',
 '9781021404299',
 '9782203129443',
 '9782203119109',
 '9782203118782',
 '9782215172260',
 '9781096688082',
 '9781021404817',
 '9782075063869',
 '9781090597342',
 '9781091081147',
 '9781021403360',
 '9781021401236',
 '9782298037883',
 '9782960080612',
 '9782842312879',
 '9783806750911',
 '9782266058292',
 '9782210986015',
 '9782092822784',
 '978B08WPCJ8P2',
 '9782298085532',
 '9782923898971',
 '9781092111188',
 '978B009SNHAGC',
 '9782010145414',
 '9782924342152',
 '9781090425522',
 '9782266106535',
 '9781023508742',
 '9782894195208',
 '9782897114589',
 '9782020029445',
 '978

In [None]:
incorrect_isbn_counter = 0
for isbn in babelio_not_btlf:
    if not isbnlib.is_isbn13(isbn):
        incorrect_isbn_counter += 1
incorrect_isbn_counter 

25

In [None]:
# verifier proportion livres non francais dans (babelio not BTLF)
non_french_book_counter = 0
french_book_counter = 0

for isbn in babelio_not_btlf:
    if isbnlib.is_isbn13(isbn) and not is_language_french(babelio_isbn_dict[isbn][0].language):
        non_french_book_counter += 1
    elif isbnlib.is_isbn13(isbn) and is_language_french(babelio_isbn_dict[isbn][0].language):
        french_book_counter += 1
        print(isbn)

print("french", french_book_counter)
print("non french", non_french_book_counter)

9782764434956
9782221016299
9781023501545
9782375110027
9781021401229
9782764429815
9781021400413
9782895023579
9781023506779
9781021401571
9781368013796
9781023508407
9781023503020
9782020327213
9782365085540
9781021405708
9781021404299
9782203129443
9782203119109
9782203118782
9782215172260
9781096688082
9781021404817
9782075063869
9781090597342
9781091081147
9781021403360
9781021401236
9782298037883
9782960080612
9782842312879
9783806750911
9782266058292
9782210986015
9782092822784
9782298085532
9782923898971
9781092111188
9782010145414
9782924342152
9781090425522
9781023508742
9782894195208
9782897114589
9782896954971
9782895310075
9781023501811
9782253041214
9782710303534
9781023508391
9782075046077
9782896628520
9782897741822
9781023501224
9782700224733
9782890514201
9782211071864
9782921365253
9782896622191
9782266079334
9782897740610
9782912360380
9782914096713
9782895967606
9782013214827
9782890062528
9782211232746
9781023502887
9782021110159
9781091416000
9781023508100
978289