In [1]:
import os
import pandas as pd
from collections import Counter, OrderedDict
import networkx as nx
import nx_altair as nxa
import altair as alt
from networkx.drawing.nx_agraph import graphviz_layout
import matplotlib.pyplot as plt

In [2]:
# load data
filepath = os.path.join(os.getcwd(), 'Datensatz_Erfinder_raw.csv')
data = pd.read_csv(filepath , sep = ";")

In [3]:
# count companies patents
patent_counter = Counter()
for row in data.itertuples():    
    row_df = pd.DataFrame(row).transpose().drop(0, axis=1)
    row_df.columns = data.columns            
    patent_counter[row_df["Anmelder/Inhaber"][0]] += 1
sorted_most_common_companies = sorted(patent_counter, key=patent_counter.get, reverse=True)

# get most common 10 companies
n_most_common_companies = sorted_most_common_companies[0:10]
n_most_common_companies_df = data[data['Anmelder/Inhaber'].isin(n_most_common_companies)]

n_most_common_companies_df

Unnamed: 0,\t\tlfd. Nummer,Veröffentlichungs-Nummer,Anmeldedatum,Erfinder,Anmelder/Inhaber,Titel,Zusammenfassung,Länderkürzel,Land,Prozessstand,Anmelderländerkürzel,Anmelderland
169,170,EP000003582168A1,14.06.2019,"EDWARDS JOSHUA, US; MOSSOBA MICHAEL, US","CAPITAL ONE SERVICES LLC, US",[DE] HALBPRIVATE VIRTUELLE BLOCKKETTENWÄHRUNG...,[EN] A first merchant node on a semi-private...,EP,Europa,A,US,USA
175,176,EP000003570244A1,20.05.2019,"PHILLIPS JEREMY, US; TRAN KURRY, US","CAPITAL ONE SERVICES LLC, US",[DE] SICHERES SYSTEM [EN] A SECURE SYSTEM [...,[EN] A device may receive application questi...,EP,Europa,A,US,USA
188,189,EP000003537363A1,05.03.2019,"MARTIN ROBERT, US; MILLER WALTER AVERY, US; SM...","CAPITAL ONE SERVICES LLC, US",[DE] SYSTEME UND VERFAHREN ZUR VERWENDUNG VON...,[EN] Disclosed is a distributed ledger proce...,EP,Europa,A,US,USA
224,225,EP000003428863A1,06.07.2018,"SILVERSTRE CARLOS, US",AMERICAN EXPRESS TRAVEL RELATED SERVICES CO IN...,[DE] FONDS-ÜBERTRAGUNGSDIENST FÜR MEHRERE VER...,"[EN] Systems, methods, and articles of manuf...",EP,Europa,A,US,USA
288,289,US020210119804A1,18.10.2019,"DESHPANDE AJAY ASHOK, US; HARSHA PAVITHRA, US;...","IBM, US",[EN] FRESHNESS VISIBILITY IN SUPPLY-CHAIN,[EN] An example operation may include one or...,US,USA,A,US,USA
...,...,...,...,...,...,...,...,...,...,...,...,...
3195,3196,WO002016186873A1,06.05.2016,"DAVIS STEVEN CHARLES, US","MASTERCARD INTERNATIONAL INC, US",[EN] METHOD AND SYSTEM FOR PROCESSING BLOCKCH...,[EN] A method for authorizing a blockchain-b...,WO,WIPO,A,US,USA
3196,3197,WO002016186872A1,06.05.2016,"DAVIS STEVEN CHARLES, US","MASTERCARD INTERNATIONAL INC, US",[EN] METHOD AND SYSTEM FOR FRAUD CONTROL OF B...,[EN] A method for Sinking blockchain transac...,WO,WIPO,A,US,USA
3197,3198,WO002016186870A1,06.05.2016,"DAVIS STEVEN CHARLES, US","MASTERCARD INTERNATIONAL INC, US",[EN] METHOD AND SYSTEM FOR LINKAGE OF BLOCKCH...,[EN] A method for managing fractional reserv...,WO,WIPO,A,US,USA
3198,3199,WO002016186869A1,06.05.2016,"DAVIS STEVEN CHARLES, US; TETALI ASHISH RAGHAV...","MASTERCARD INTERNATIONAL INC, US",[EN] METHOD AND SYSTEM FOR INTEGRATION OF MAR...,[EN] A method for authorization of a blockch...,WO,WIPO,A,US,USA


In [50]:

normalized_data = []
# fill patents inventors if they are not filled
n_most_common_companies_df['Erfinder'] = n_most_common_companies_df['Erfinder'].fillna(value='no Inventor')
# loop over most common companies
for row in n_most_common_companies_df.itertuples():
    row_df = pd.DataFrame(row).transpose().drop(0, axis=1)
    row_df.columns = n_most_common_companies_df.columns
    
    # get inventors normalized 
    inventors = row_df["Erfinder"][0].split(';')       
    # create new line for every Inventor and applicant
    for inventor in inventors:
        normalized_row = [row_df["Veröffentlichungs-Nummer"][0],inventor.strip(), row_df["Anmelder/Inhaber"][0]]
        normalized_data.append(normalized_row)

# drop duplicates based on inventor and applicant
normalized_data_df = pd.DataFrame(normalized_data, columns=["Veröffentlichungs-Nummer", "Erfinder", "Anmelder/Inhaber"]).drop_duplicates(subset=["Erfinder", "Anmelder/Inhaber"])
normalized_data_df.to_csv("Datensatz_Erfinder_normalisiert.csv", sep=";")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  n_most_common_companies_df['Erfinder'] = n_most_common_companies_df['Erfinder'].fillna(value='no Inventor')


In [45]:
# create graph from edgelist
inventor_graph = nx.from_pandas_edgelist(normalized_data_df,
source = "Erfinder",
target = "Anmelder/Inhaber")

# add "Erfinder" as node attribute
nx.set_node_attributes(inventor_graph, pd.Series(normalized_data_df.Erfinder).to_dict())

In [46]:
#draw networkx altair chart with "Erfinder" in tooltips
alt.data_transformers.disable_max_rows()
chart = nxa.draw_networkx(
    inventor_graph,
    node_color='Anmelder/Inhaber:N',
    cmap='viridis',
    edge_color='black',
    node_size=100,
    node_tooltip = ['Erfinder:N']
).properties(
    width=600,
    height=600
).interactive()
chart