Skip to content

Commit

Permalink
doc
Browse files Browse the repository at this point in the history
  • Loading branch information
Patent2net committed Jan 7, 2021
1 parent 3fc80cf commit 7c529a5
Show file tree
Hide file tree
Showing 39 changed files with 393 additions and 477 deletions.
6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

75 changes: 75 additions & 0 deletions .idea/workspace.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 6 additions & 3 deletions Patent2Net/AcadStatsSuiteDer.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def Nettoie(Liste):
with open(Auteur+'//traceAuct.csv', 'r',) as fic:
dataAuct = fic.readlines()
Auteurs = dict()
for lig in dataAuct:
for lig in dataAuct[1:]:
lig = lig.strip()
col= lig.split(';')
if col[0] not in Auteurs.keys():
Expand Down Expand Up @@ -117,7 +117,7 @@ def Nettoie(Liste):
# Inventeurs2 = [inv for inv in Inventeurs if inv not in Inventeurs1]
print ("nombre d'inventeurs ", len(Inventeurs))
print ("Nombre d'inventeurs uniques :", len(set(Inventeurs)))
print ("nombre d'auteurs", sum([1 for aut in Auteurs.keys() if int(Auteurs [aut]['publis'])>0]))
print ("nombre d'auteurs", sum([1 for aut in Inventeurs if aut in Auteurs.keys() and int(Auteurs [aut]['publis'])>0]))
# print ("nombre d'auteurs FR", sum([1 for aut in Auteurs.keys() if Auteurs [aut]['affilFr'] == 'True' and int(Auteurs [aut]['publis'])>0]))
# print ("nombre d'auteurs pas FR", sum([1 for aut in Auteurs.keys() if Auteurs [aut]['affilFr'] == 'False' and int(Auteurs [aut]['publis'])>0]))
#print ("nombre de publications", sum([int(Auteurs [aut]['publis']) for aut in Auteurs.keys()])) données fausse dans le fichier trace auc. Des publis mathchées alors que publis = 0 pour certains auteurs
Expand Down Expand Up @@ -219,7 +219,10 @@ def GenereListeFichiers(rep):
tempo = []
DejaVus = []
for lig in set(Datacsv[1:]):
pubmedId = lig.split(';')[4]
if len(lig.split(';')) >3:
pubmedId = lig.split(';')[4]
else: # seems a problem with DOI
pubmedId = lig[0]
if pubmedId not in DejaVus:
tempo.append(lig)
DejaVus.append(pubmedId)
Expand Down
1 change: 1 addition & 0 deletions Patent2Net/AcadTraite2.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def Nettoie(Liste):
return list(filter(lambda x: x not in indesirables, Liste))

# test de consistance

with open(Auteur+'//DejaTraites.csv', 'r',) as fic:
DejaVus = fic.readlines()

Expand Down
2 changes: 2 additions & 0 deletions Patent2Net/AnalyseReseaux2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"""
Created on Sat Jun 29 07:41:54 2019
OLD version. Use P2N-Nets !!!!
@author: dreymond
"""

Expand Down
122 changes: 78 additions & 44 deletions Patent2Net/P2N-Nets.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,21 +84,8 @@
# Liste = [' '.join([truc.lower().title() for truc in nom.split(' ')]) for nom in Liste ]
# return list(filter(lambda x: x not in indesirables, Liste))

# test de consistance
with open(Auteur+'//DejaTraites.csv', 'r',) as fic:
DejaVus = fic.readlines()

if len (set(DejaVus)) == len(DataBrevet['brevets']):
print ('Youhou, tous les brevets ' + ndf + ' ont été traités.')
print ('Nb de brevets : ', len(DataBrevet['brevets']))

else:
reste = [bre['label'] for bre in DataBrevet['brevets'] if bre['label'] not in DejaVus ]
print ('il reste ', len(reste), ' brevets à traiter')

# Analyse stat des résultats
print ("""Ceux qui ont changé d'affiliation la première trouvée puis la seconde...
S'il en est plus de deux, ce sera la première puis la 2e....""")

lstfic = os.listdir(configFile.ResultPath +'//AcadCorpora')
# loading file from preProcessNormalisationNames
# inventors names are normalised there
Expand All @@ -111,37 +98,80 @@
Inventeur_Norm [cle] = [truc.title() for truc in Inventeur_Norm [cle]]

InvNormes = [aut.title() for cle in Inventeur_Norm.keys() for aut in Inventeur_Norm [cle]]
with codecs.open(configFile.ResultPath +'//AcadCorpora//AuteursAffil.csv', 'r', 'utf8') as fic:
data = fic.readlines()
multiAut = 0 # inventeurs prolixes
AffilDiff = 0 # les affiliations différentes
Auteurs = dict()
for lig in data:
col = lig .strip()
col = col.split(';')
if col[0].title() in InvNormes:
if col[0].title() in Inventeur_Norm.keys():
Auteurs [ col[0].title()] = col[1]
elif NoPunct(col[0].title()) in Inventeur_Norm.keys():
Auteurs [NoPunct(col[0].title())] = col[1]
# with codecs.open(configFile.ResultPath +'//AcadCorpora//AuteursAffil.csv', 'r', 'utf8') as fic:
# data = fic.readlines()
# multiAut = 0 # inventeurs prolixes
# AffilDiff = 0 # les affiliations différentes
# Auteurs = dict()
# for lig in data:
# col = lig .strip()
# col = col.split(';')
# if col[0].title() in InvNormes:
# if col[0].title() in Inventeur_Norm.keys():
# Auteurs [ col[0].title()] = col[1]
# elif NoPunct(col[0].title()) in Inventeur_Norm.keys():
# Auteurs [NoPunct(col[0].title())] = col[1]

else:
Auteurs [[cle.title() for cle in Inventeur_Norm.keys() if col[0].title() in Inventeur_Norm[cle]][0].title()] = col[1]
if col[0].title() not in Auteurs.keys():
Auteurs [col[0].title()] = col[1]
elif NoPunct(col[0].title()) not in Auteurs.keys():
Auteurs [NoPunct(col[0].title())] = col[1]
else:
if col[1] != Auteurs [col[0].title()] and '???' not in col[1]:
print (col[0], " --> ", Auteurs [col[0]])
print (col[0], " --> ", col[1])
# else:
# Auteurs [[cle.title() for cle in Inventeur_Norm.keys() if col[0].title() in Inventeur_Norm[cle]][0].title()] = col[1]
# if col[0].title() not in Auteurs.keys():
# Auteurs [col[0].title()] = col[1]
# elif NoPunct(col[0].title()) not in Auteurs.keys():
# Auteurs [NoPunct(col[0].title())] = col[1]
# else:
# if col[1] != Auteurs [col[0].title()] and '???' not in col[1]:
# print (col[0], " --> ", Auteurs [col[0]])
# print (col[0], " --> ", col[1])

AffilDiff +=1
multiAut+=1 # non sens, le script peut recollecter plusieurs fois le même
else:
multiAut+=1
pass
# AffilDiff +=1
# multiAut+=1 # non sens, le script peut recollecter plusieurs fois le même
# else:
# multiAut+=1
# pass
Auteurs = dict()
with open(RepDir + "//AuteursMatches.tsv", "r", encoding = 'utf8') as ficMatch:
DataMatch = ficMatch.readlines()[1:]

for lig in DataMatch:
col = lig.strip().split("\t")
if col[0] not in Auteurs.keys():
Auteurs [col[0]] = dict()
else:
print ("pb here")
Auteurs [col[0]]["publisMatch"] = int(col [1])
Auteurs [col[0]]["publis"] = int(col [2])
Auteurs [col[0]]["Score moyen"] = float(col [3])



with open(RepDir + "//AuteursPAsMatches.tsv", "r", encoding = 'utf8') as ficMatch:
DataPasMatch = ficMatch.readlines()[1:]

for lig in DataPasMatch:
col = lig.strip().split("\t")
if col[0] not in Auteurs.keys():
Auteurs [col[0]] = dict()
else:
print ("big pb here")
Auteurs [col[0]]["publisMatch"] = 0
Auteurs [col[0]]["publis"] = 0
Auteurs [col[0]]["Score moyen"] = 0

with open(Auteur+'//traceAuct.csv', 'r',) as fic:
dataAuct = fic.readlines()

for lig in dataAuct[1:]:
lig = lig.strip()
col= lig.split(';')
if col[0] not in Auteurs.keys():
Auteurs [col[0]] = dict()
Auteurs [col[0]]['publis'] = int(col [1])
Auteurs [col[0]]['publisMatch'] = int(col [2])
Auteurs [col[0]]['affilFr'] = bool(col [3])
else: # merging
Auteurs [col[0]]['publis'] += int(col [1])
Auteurs [col[0]]['publisMatch'] += int(col [2])
Auteurs [col[0]]['affilFr'] = bool(col [3])


for fic in [ndf, 'Families'+ndf]:
Expand Down Expand Up @@ -206,8 +236,12 @@
df_Fam = pd.DataFrame(LstBrevet)
else:
df = pd.DataFrame(LstBrevet)
AuteursFr = {cle for cle, val in Auteurs.items() if "france" in val.lower()}
AuteursNotFr = {cle for cle, val in Auteurs.items() if not Check(val, AuteursFr)}

for aut in Auteurs.keys():
if 'affilFr' not in Auteurs [aut].keys():
Auteurs [aut]['affilFr'] = False
AuteursFr = [cle for cle in Auteurs.keys() if Auteurs[cle]['affilFr']]#{cle for cle, val in Auteurs.items() if "france" in val.lower()}
AuteursNotFr = [cle for cle in Auteurs.keys() if not Auteurs[cle]['affilFr']]

Applis = []
Techno = dict()
Expand Down
13 changes: 8 additions & 5 deletions Patent2Net/PreScriptP7.ps1
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# script to filter and normalize name. P2N do these steps alone now. Corpora spliter is then called

$Reps = "REQ-P7"
$scriptPath = (-join('..\', $scriptPath, '\'))
$filename = Get-ChildItem (-join('..\', $scriptPath, '\'))
# foreach ($f in $filename) {python PatentListFiltering.py (-join('..\', $scriptPath, '\',$f))}
foreach ($f in $filename) {python preProcessNormalisationNames.py (-join('..\', $scriptPath, '\',$f))}
foreach ($f in $filename) {python SplitCorpus2.py (-join('..\', $scriptPath, '\',$f))}
$scriptPath = (-join('..\', $Reps))
$filename = Get-ChildItem ($scriptPath)
# foreach ($f in $filename) {python PatentListFiltering.py (-join('..\', $scriptPath, '\',$f))}
# foreach ($f in $filename) {python preProcessNormalisationNames.py (-join('..\', $scriptPath, '\',$f))}
# next is needed to generate publication auctoriality and affiliation
# wil be launched again after corpora splitting for sub corporas
foreach ($f in $filename) {python AcadStatsSuiteDer.py (-join($scriptPath, '\',$f))}
foreach ($f in $filename) {python SplitCorpus2.py (-join($scriptPath, '\',$f))}
8 changes: 8 additions & 0 deletions Patent2Net/ScriptList-P7.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FormateExportAttractivityCartography.py
FormateExportBiblio.py
FormateExportCountryCartography.py
FormateExportDataTableFamilies.py
FormateExportDataTable.py
FormateExportPivotTable.py
P2N-FreePlane.py
Interface2.py

0 comments on commit 7c529a5

Please sign in to comment.