Skip to content

Commit

Permalink
projet 7 (fin ?)
Browse files Browse the repository at this point in the history
  • Loading branch information
Patent2net committed Jan 9, 2021
1 parent f26da5d commit b1643c3
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 30 deletions.
95 changes: 71 additions & 24 deletions Patent2Net/P2N-Nets.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@
Inventeur_Norm [cle] = [truc.title() for truc in Inventeur_Norm [cle]]

InvNormes = [aut.title() for cle in Inventeur_Norm.keys() for aut in Inventeur_Norm [cle]]
InvNormes = list(set(InvNormes))

# with codecs.open(configFile.ResultPath +'//AcadCorpora//AuteursAffil.csv', 'r', 'utf8') as fic:
# data = fic.readlines()
# multiAut = 0 # inventeurs prolixes
Expand Down Expand Up @@ -131,6 +133,7 @@
Auteurs = dict()
with open(RepDir + "//AuteursMatches.tsv", "r", encoding = 'utf8') as ficMatch:
DataMatch = ficMatch.readlines()[1:]
PubMedAuct = []

for lig in DataMatch:
col = lig.strip().split("\t")
Expand All @@ -141,7 +144,7 @@
Auteurs [col[0]]["publisMatch"] = int(col [1])
Auteurs [col[0]]["publis"] = int(col [2])
Auteurs [col[0]]["Score moyen"] = float(col [3])

PubMedAuct.append(col[0])


with open(RepDir + "//AuteursPAsMatches.tsv", "r", encoding = 'utf8') as ficMatch:
Expand All @@ -156,22 +159,25 @@
Auteurs [col[0]]["publisMatch"] = 0
Auteurs [col[0]]["publis"] = 0
Auteurs [col[0]]["Score moyen"] = 0

PubMedAuct.append(col[0])

with open(Auteur+'//traceAuct.csv', 'r',) as fic:
dataAuct = fic.readlines()
if 'Nombre publications' in dataAuct [0]:
dataAuct = dataAuct [1:]

for lig in dataAuct[1:]:
for lig in dataAuct:
lig = lig.strip()
col= lig.split(';')
if col[0] not in Auteurs.keys():
Auteurs [col[0]] = dict()
Auteurs [col[0]]['publis'] = int(col [1])
Auteurs [col[0]]['publisMatch'] = int(col [2])
Auteurs [col[0]]['affilFr'] = bool(col [3])
Auteurs [col[0]]['affilFr'] = col [3]
else: # merging
Auteurs [col[0]]['publis'] += int(col [1])
Auteurs [col[0]]['publisMatch'] += int(col [2])
Auteurs [col[0]]['affilFr'] = bool(col [3])
Auteurs [col[0]]['affilFr'] = col [3]


for fic in [ndf, 'Families'+ndf]:
Expand Down Expand Up @@ -240,8 +246,8 @@
for aut in Auteurs.keys():
if 'affilFr' not in Auteurs [aut].keys():
Auteurs [aut]['affilFr'] = False
AuteursFr = [cle for cle in Auteurs.keys() if Auteurs[cle]['affilFr']]#{cle for cle, val in Auteurs.items() if "france" in val.lower()}
AuteursNotFr = [cle for cle in Auteurs.keys() if not Auteurs[cle]['affilFr']]
AuteursFr = [cle for cle in Auteurs.keys() if Auteurs[cle]['affilFr'] == 'True']#{cle for cle, val in Auteurs.items() if "france" in val.lower()}
AuteursNotFr = [cle for cle in Auteurs.keys() if Auteurs[cle]['affilFr'] == 'False']

Applis = []
Techno = dict()
Expand All @@ -260,6 +266,9 @@ def cycle (liste):
for indice in range(taille):
tempo.append((liste [indice], liste[indice+1]))
return tempo
AuteursFr2 = []
for aut in AuteursFr:
AuteursFr2.extend(aut.split(' '))

for bre in df['label']:
try:
Expand All @@ -272,11 +281,12 @@ def cycle (liste):
if len(inv)>0:
if inv in AuteursFr :
df .loc[df.index[df['label'] == bre], ['AutorFr']] += 1

Inventeurs = set()
for bre in DataBrevet ['brevets']:
for appl in bre['applicant']:
if len(appl) ==1:
print()
print("encore des données pas bonnes !")
appl=appl.upper()
if appl in Techno.keys():
for cib in bre ['IPCR11']:
Expand All @@ -290,6 +300,10 @@ def cycle (liste):
print("ARFFFFF")
Inventeurs.add(inv.title())
Techno[inv.title()] = [cib for cib in bre ['IPCR11']]
if inv not in Auteurs.keys() and inv.title() not in Auteurs.keys():
#authors coming from "families sets"
print ('GRRR: ', inv)
Auteurs [inv] = {'publis': -1, 'publisMatch': 0, 'affilFr': 'False'}
GraphAuteurs = nx.DiGraph()
GraphApplicant = nx.DiGraph()
GraphBrevets = nx.DiGraph()
Expand Down Expand Up @@ -408,8 +422,10 @@ def cycle (liste):
if aut in dicoAttrsAut.keys():
if dicoAttrsAut [aut]['AutFr']:
typeAut = 'AutFr'
else:
elif not dicoAttrsAut [aut]['AutFr']:
typeAut = 'AutEtr'
else:
typeAut = 'PasSurPubMed'
dicoAttrsAut [aut] = {'AutFr': dicoAttrsAut [aut]['AutFr'],
'Citations' : dicoAttrsAut [aut]['Citations'] + bre['Citations'],
'Famille' : df['family lenght'].loc[df.index[df['label'] == bre['label']]].values[0],
Expand All @@ -422,19 +438,26 @@ def cycle (liste):
'IPCForce' : len(Techno [aut])
}
else:
typeAut = 'PasSurPubMed'
dicoAttrsAut [aut] = {'AutFr': aut.title() in AuteursFr,
'Citations' : bre['Citations'],
'Famille' : df['family lenght'].loc[df.index[df['label'] == bre['label']]].values[0],
'type' : typeAut,
'NbBrevets' : 1,
'IPC11-range' : Techno[aut],
'IPC7-range' : Techno[aut],
'IPC4-range' : Techno[aut],
'IPCDiversity': len(set(Techno [aut])),
'IPCForce' : len(Techno [aut])
}


dicoAttrsAut [aut] = {'AutFr': aut.title() in AuteursFr,
'Citations' : bre['Citations'],
'Famille' : df['family lenght'].loc[df.index[df['label'] == bre['label']]].values[0],
'NbBrevets' : 1,
'IPC11-range' : Techno[aut],
'IPC7-range' : Techno[aut],
'IPC4-range' : Techno[aut],
'IPCDiversity': len(set(Techno [aut])),
'IPCForce' : len(Techno [aut])
}
if dicoAttrsAut [aut]['AutFr']:
typeAut = 'AutFr'
elif not dicoAttrsAut [aut]['AutFr']:
typeAut = 'AutEtr'
else:
typeAut = 'PasSurPubMed'
dicoAttrsAut [aut] ['type'] = typeAut


# chaining collaborations

if isinstance(bre['inventor'], list) and len( bre['inventor'])>1:
Expand Down Expand Up @@ -529,7 +552,8 @@ def cycle (liste):
IPC4 +=dicoAttrs [appl]['IPC4-range']
IPCForce += Techno [appl]


lstFr= [truc.split('-')[1] for truc in os.listdir(configFile.ResultPath +'//AcadCorpora/Fr')]
lstEtr= [truc.split('-')[1] for truc in os.listdir(configFile.ResultPath +'//AcadCorpora/NoFr')]
for appl in Inventeurs:
Techno [appl] = list(filter(lambda x: x !='', Techno [appl]))
appl=appl.title()
Expand All @@ -551,6 +575,29 @@ def cycle (liste):
'IPCDiversity': len(set(Techno [appl])),
'IPCForce' : IPCForceAut
}

for aut in Auteurs.keys():
#if aut in PubMedAuct:
# pas forcément vrai j'ai dû zapper cette info dans le traitement
# on retrouve les étrangers dans le dossier et fait un check par leur nom... tordu
if aut.replace(' ','') in lstFr and aut.replace(' ','') not in lstEtr:
typeAut = 'AutFr'
elif aut.replace(' ','') in lstEtr and aut.replace(' ','') not in lstFr:
typeAut = 'AutEtr'


elif aut.replace(' ','') not in lstFr and aut.replace(' ','') not in lstEtr:
typeAut = 'PasSurPubMed'

elif aut in AuteursFr:
typeAut = 'AutFr'
else:
typeAut = 'AutEtr'


if aut in dicoAttrsAut.keys():
dicoAttrsAut [aut] ['type'] = typeAut


IPCDiversity = len(set(IPCForce))
IPCForce = len(IPCForce)
Expand Down Expand Up @@ -578,7 +625,7 @@ def cycle (liste):
ligne = str(tailleCrp) +';'
if tailleCrp ==0:
tailleCrp =1

# [aut for aut in dicoAttrsAut.keys() if dicoAttrsAut[aut]["type"]=="PasSurPubMed"]
ligne += str(NbBrevets) +';'+\
str(len(set(Applis)))+';'+\
str(len(dicoAttrsAut.keys())) +';'+\
Expand Down
Binary file added Patent2Net/PreTRaiteP7.txt
Binary file not shown.
32 changes: 28 additions & 4 deletions Patent2Net/SplitCorpus2.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,13 @@
shutil.copy(RepDir + '/'+"traceAuct.csv", ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/')
shutil.copy(RepDir + '/'+"AuteursPAsMatches.tsv", ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/')
shutil.copy(RepDir + '/'+"AuteursMatches.tsv", ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/')

if 'Fr' not in os.listdir(ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/'):
shutil.copytree(RepDir + '/Fr',
ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/Fr')
if 'NoFr' not in os.listdir(ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/'):

shutil.copytree(RepDir + '/NoFr',
ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/NoFr')
if "InventeurNormes.pkl" in os.listdir(BiblioPath):
shutil.copy(BiblioPath+'/InventeurNormes.pkl', ResultBiblioPath)
shutil.copy(BiblioPath+'/NormInventeurs.pkl', ResultBiblioPath)
Expand Down Expand Up @@ -340,7 +346,13 @@
shutil.copy(RepDir + '/'+"traceAuct.csv", ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/')
shutil.copy(RepDir + '/'+"AuteursPAsMatches.tsv", ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/')
shutil.copy(RepDir + '/'+"AuteursMatches.tsv", ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/')

if 'Fr' not in os.listdir(ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/'):
shutil.copytree(RepDir + '/Fr',
ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/Fr')
if 'NoFr' not in os.listdir(ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/'):

shutil.copytree(RepDir + '/NoFr',
ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/NoFr')
if "InventeurNormes.pkl" in os.listdir(BiblioPath):
shutil.copy(BiblioPath+'/InventeurNormes.pkl', ResultBiblioPath)
shutil.copy(BiblioPath+'/NormInventeurs.pkl', ResultBiblioPath)
Expand Down Expand Up @@ -370,7 +382,13 @@
shutil.copy(RepDir + '/'+"traceAuct.csv", ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/')
shutil.copy(RepDir + '/'+"AuteursPAsMatches.tsv", ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/')
shutil.copy(RepDir + '/'+"AuteursMatches.tsv", ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/')

if 'Fr' not in os.listdir(ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/'):
shutil.copytree(RepDir + '/Fr',
ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/Fr')
if 'NoFr' not in os.listdir(ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/'):

shutil.copytree(RepDir + '/NoFr',
ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/NoFr')
if "InventeurNormes.pkl" in os.listdir(BiblioPath):
shutil.copy(BiblioPath+'/InventeurNormes.pkl', ResultBiblioPath)
shutil.copy(BiblioPath+'/NormInventeurs.pkl', ResultBiblioPath)
Expand Down Expand Up @@ -399,7 +417,13 @@
shutil.copy(RepDir + '/'+"traceAuct.csv", ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/')
shutil.copy(RepDir + '/'+"AuteursPAsMatches.tsv", ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/')
shutil.copy(RepDir + '/'+"AuteursMatches.tsv", ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/')

if 'Fr' not in os.listdir(ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/'):
shutil.copytree(RepDir + '/Fr',
ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/Fr')
if 'NoFr' not in os.listdir(ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/'):

shutil.copytree(RepDir + '/NoFr',
ResultBiblioPath.replace('/PatentBiblios', '') + '/AcadCorpora/NoFr')
if "InventeurNormes.pkl" in os.listdir(BiblioPath):
shutil.copy(BiblioPath+'/InventeurNormes.pkl', ResultBiblioPath)
shutil.copy(BiblioPath+'/NormInventeurs.pkl', ResultBiblioPath)
Expand Down
4 changes: 2 additions & 2 deletions Patent2Net/scriptP7.ps1
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
$Reps = "requests3", "request2", "request", "REQ-P7"
$Reps = "requests3", "requests2", "requests", "REQ-P7"
foreach ($scriptPath in $Reps) {
$scriptPath = (-join('..\', $scriptPath, '\'))
echo $scriptPath
$filename = Get-ChildItem ($scriptPath)
#foreach ($f in $filename) {python AcadStatsSuiteDer.py (-join($scriptPath,$f))}
foreach ($f in $filename) {python P2N-Nets.py (-join($scriptPath,$f))}
foreach ($f in $filename) {foreach ($app in (Get-Content .\scriptlist.txt)) {python $app --config=(-join($scriptPath,$f))}}
# foreach ($f in $filename) {foreach ($app in (Get-Content .\scriptlist-P7.txt)) {python $app --config=(-join($scriptPath,$f))}}
}
3 changes: 3 additions & 0 deletions dex.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
document.write('\
<ul>\
<li><a href="DATA/P7-Univ.html" target="_blank">P7-Univ</a></li>\
<li><a href="DATA/P7-Largebis.html" target="_blank">P7-Largebis</a></li>\
<li><a href="DATA/P7-Large.html" target="_blank">P7-Large</a></li>\
</ul>\
');

0 comments on commit b1643c3

Please sign in to comment.