In [1]:
import pandas as pd
from neo4j import GraphDatabase
from tqdm import tqdm

uri = "neo4j://localhost:7687"  
username = "neo4j"
password = "912699176"

driver = GraphDatabase.driver(uri, auth=(username, password))


companies_path = '../KnowledgeGraph/hidy.nodes.company.csv'
relationship_compete_path = '../KnowledgeGraph/hidy.relationships.compete.csv'
relationship_cooperate_path = '../KnowledgeGraph/hidy.relationships.cooperate.csv'
relationship_dispute_path = '../KnowledgeGraph/hidy.relationships.dispute.csv'
relationship_invest_path = '../KnowledgeGraph/hidy.relationships.invest.csv'
relationship_same_industry_path = '../KnowledgeGraph/hidy.relationships.same_industry.csv'
relationship_supply_path = '../KnowledgeGraph/hidy.relationships.supply.csv'

companies_df = pd.read_csv(companies_path)
relationship_compete_df = pd.read_csv(relationship_compete_path)
relationship_cooperate_df = pd.read_csv(relationship_cooperate_path)
relationship_dispute_df = pd.read_csv(relationship_dispute_path)
relationship_invest_df = pd.read_csv(relationship_invest_path)
relationship_same_industry_df = pd.read_csv(relationship_same_industry_path)
relationship_supply_df = pd.read_csv(relationship_supply_path)

FileNotFoundError: [Errno 2] No such file or directory: '../KnowledgeGraph/hidy.nodes.company.csv'

In [2]:
columns_info = {
    "Companies Columns": companies_df.columns.tolist(),
    "Compete Relationship Columns": relationship_compete_df.columns.tolist(),
    "Cooperate Relationship Columns": relationship_cooperate_df.columns.tolist(),
    "Dispute Relationship Columns": relationship_dispute_df.columns.tolist(),
    "Invest Relationship Columns": relationship_invest_df.columns.tolist(),
    "Same Industry Relationship Columns": relationship_same_industry_df.columns.tolist(),
    "Supply Relationship Columns": relationship_supply_df.columns.tolist()
}

columns_info

{'Companies Columns': [':ID', 'company_name', 'code', ':LABEL'],
 'Compete Relationship Columns': [':START_ID', ':END_ID', ':TYPE', 'time'],
 'Cooperate Relationship Columns': [':START_ID', ':END_ID', ':TYPE', 'time'],
 'Dispute Relationship Columns': [':START_ID', ':END_ID', ':TYPE', 'time'],
 'Invest Relationship Columns': [':START_ID', ':END_ID', ':TYPE'],
 'Same Industry Relationship Columns': [':START_ID',
  ':END_ID',
  ':TYPE',
  'time'],
 'Supply Relationship Columns': [':START_ID', ':END_ID', ':TYPE']}

In [3]:
def create_company_node(tx, id, name, code, label):
    query = (
        "MERGE (:Company {id: $id, name: $name, code: $code, label: $label})"
    )
    tx.run(query, id=id, name=name, code=code, label=label)

# create company node
with driver.session() as session:
    for index, row in tqdm(companies_df.iterrows(), total=companies_df.shape[0], desc="Company Nodes"):
        session.execute_write(create_company_node, row[':ID'], row['company_name'], row['code'], row[':LABEL'])

Company Nodes: 100%|██████████| 3974/3974 [00:27<00:00, 143.85it/s]


In [5]:
def create_dynamic_relation(tx, start_id, end_id, relation_type, time):
    query = (
        f"MATCH (a:Company {{id: $start_id}}), (b:Company {{id: $end_id}}) "
        f"MERGE (a)-[:{relation_type} {{time: $time}}]->(b)"
    )
    tx.run(query, start_id=start_id, end_id=end_id, time=time)

def create_dynamic_relation_without_time(tx, start_id, end_id, relation_type):
    query = (
        f"MATCH (a:Company {{id: $start_id}}), (b:Company {{id: $end_id}}) "
        f"MERGE (a)-[:{relation_type}]->(b)"
    )
    tx.run(query, start_id=start_id, end_id=end_id)

For compete, cooperate, dispute and same_industry relationship, they have the time column, so we can use the create_dynamic_relation function to create the relationship. 

But for the invest and supply relationship, they don't have the time column, so we need to use the create_dynamic_relation_without_time function to create the relationship.

In [6]:
with driver.session() as session:
    # Create compete relationship
    for index, row in tqdm(relationship_compete_df.iterrows(), total=relationship_compete_df.shape[0], desc="Compete Relationships"):
        session.execute_write(create_dynamic_relation, row[':START_ID'], row[':END_ID'], row[':TYPE'], row['time'])
    
    # Create cooperate relationship
    for index, row in tqdm(relationship_cooperate_df.iterrows(), total=relationship_cooperate_df.shape[0], desc="Cooperate Relationships"):
        session.execute_write(create_dynamic_relation, row[':START_ID'], row[':END_ID'], row[':TYPE'], row['time'])
    
    # Create dispute relationship
    for index, row in tqdm(relationship_dispute_df.iterrows(), total=relationship_dispute_df.shape[0], desc="Dispute Relationships"):
        session.execute_write(create_dynamic_relation, row[':START_ID'], row[':END_ID'], row[':TYPE'], row['time'])
        
    # Create same_industry relationship
    for index, row in tqdm(relationship_same_industry_df.iterrows(), total=relationship_same_industry_df.shape[0], desc="Same Industry Relationships"):
        session.execute_write(create_dynamic_relation, row[':START_ID'], row[':END_ID'], row[':TYPE'], row['time'])


Compete Relationships: 100%|██████████| 25/25 [00:00<00:00, 80.27it/s]
Cooperate Relationships: 100%|██████████| 3603/3603 [00:27<00:00, 129.02it/s]
Dispute Relationships: 100%|██████████| 439/439 [00:03<00:00, 110.92it/s]
Same Industry Relationships: 100%|██████████| 5596/5596 [00:52<00:00, 107.16it/s]


In [7]:
with driver.session() as session:
    # Create invest relationship
    for index, row in tqdm(relationship_invest_df.iterrows(), total=relationship_invest_df.shape[0], desc="Invest Relationships"):
        session.execute_write(create_dynamic_relation_without_time, row[':START_ID'], row[':END_ID'], row[':TYPE'])
        
    # Create supply relationship
    for index, row in tqdm(relationship_supply_df.iterrows(), total=relationship_supply_df.shape[0], desc="Supply Relationships"):
        session.execute_write(create_dynamic_relation_without_time, row[':START_ID'], row[':END_ID'], row[':TYPE'])

Invest Relationships: 100%|██████████| 559/559 [00:05<00:00, 109.06it/s]
Supply Relationships: 100%|██████████| 1444/1444 [00:13<00:00, 104.67it/s]


In [None]:
def fetch_relationships(tx, company_name):
    query = (
        "MATCH (a:Company {name: $company_name})-[r]->(b) "
        "RETURN type(r) as relation_type, b.name as company_name"
    )
    result = tx.run(query, company_name=company_name)
    return [(record["relation_type"], record["company_name"]) for record in result]


In [None]:
import pandas as pd
df = pd.read_csv("Test.xlsx")

negative_relations = ['compete', 'dispute']
positive_relations = ['cooperate', 'invest', 'same_industry', 'supply']  # 根据实际情况调整

df['Implicit_Negative_Company'] = None
df['Implicit_Positive_Company'] = None

with driver.session() as session:
    for index, row in df.iterrows():
        companies = row['name'].split(' ')
        implicit_negative = []
        implicit_positive = []

        for company in companies:
            relationships = session.read_transaction(fetch_relationships, company)

            for relation_type, related_company in relationships:
                if row['label'] == 1:
                    if relation_type in negative_relations:
                        implicit_negative.append(related_company)
                    elif relation_type in positive_relations:
                        implicit_positive.append(related_company)
                else:
                    if relation_type in positive_relations:
                        implicit_negative.append(related_company)
                    elif relation_type in negative_relations:
                        implicit_positive.append(related_company)
        
        df.at[index, 'Implicit_Negative_Company'] = ' '.join(implicit_negative) if implicit_negative else 'None'
        df.at[index, 'Implicit_Positive_Company'] = ' '.join(implicit_positive) if implicit_positive else 'None'
