In [2]:
response = {
    "articles": [
        {
            "title": "Prime Minister inaugurates the $400 temple in Kanpur"
        },
        {
            "title": "President launches new initiative"
        }
    ]
}

In [3]:
import spacy
import re

nlp = spacy.load("en_core_web_sm")

def get_main_verb(sentence):
    doc = nlp(sentence)
    main_verb = None
    for token in doc:
        if token.pos_ == "VERB" and token.dep_ != "aux":
            main_verb = token.text
            break
    return main_verb

In [4]:
from DataModelling import DataModel

data_model = DataModel()


for article in response["articles"]:
    title = article["title"]
    main_verb = get_main_verb(title)
    custom_pattern = re.compile(fr'(?P<entity1>.+?)\s+(?P<relation>\b(?:{main_verb})\b)\s+(?P<entity2>.+)')
    match = custom_pattern.match(title)
    entity1 = match.group("entity1").strip()
    entity2 = match.group("entity2").strip()
    relation = match.group("relation").strip()
    data_model.add_entity(entity1)
    data_model.add_entity(entity2)
    data_model.add_relation(relation, [
        entity1,
        entity2
    ])
    data_model.save_to_csv()
    print(f"Added {entity1} {relation} {entity2} to the data model")

Added Prime Minister inaugurates the $400 temple in Kanpur to the data model
Added President launches new initiative to the data model


In [24]:
import pandas as pd
import spacy

nlp = spacy.load('en_core_web_md')
data = pd.read_csv("Relations.csv")

data['Entity1'] = data['Entity1'].astype(str)
data['Entity2'] = data['Entity2'].astype(str)
data['Relation'] = data['Relation'].astype(str)
data['Entity1'] = data['Entity1'].str.lower()
data['Entity2'] = data['Entity2'].str.lower()
data['Relation'] = data['Relation'].str.lower()

for index, row in data.iterrows():
    if ":" in row['Entity1']:
        data.at[index, 'Entity1'] = row['Entity1'].split(":")[1]
    if ":" in row['Entity2']:
        data.at[index, 'Entity2'] = row['Entity2'].split(":")[1]


unique_entities1 = pd.unique(data['Entity1'])
unique_entities2 = pd.unique(data['Entity2'])

for i in range(len(unique_entities1)):
    for j in range(i+1, len(unique_entities1)):
        entity1 = nlp(unique_entities1[i])
        entity2 = nlp(unique_entities1[j])
        similarity = entity1.similarity(entity2)
        if similarity > 0.6:
            data['Entity1'].replace(
                unique_entities1[j], unique_entities1[i], inplace=True)

for i in range(len(unique_entities2)):
    for j in range(i+1, len(unique_entities2)):
        entity1 = nlp(unique_entities2[i])
        entity2 = nlp(unique_entities2[j])
        similarity = entity1.similarity(entity2)
        if similarity > 0.8:
            data['Entity2'].replace(
                unique_entities2[j], unique_entities2[i], inplace=True)

data.drop_duplicates(inplace=True)
data.reset_index(drop=True, inplace=True)
data.to_csv("Relations.csv", index=False)

In [25]:
import pandas as pd
from pyvis.network import Network
import os

os.makedirs('output_graphs', exist_ok=True)

data = pd.read_csv("Relations.csv")

unique_entities = pd.unique(data['Entity1'])

for entity in unique_entities:
    net = Network(notebook=True)
    net.force_atlas_2based(spring_length=100)
    entity_data = data[data['Entity1'] == entity]
    relations = entity_data['Relation'].unique()

    for relation in relations:
        related_entities = entity_data[entity_data['Relation']
                                       == relation]['Entity2'].tolist()

        for related_entity in related_entities:
            net.add_node(entity, color='skyblue', size=50, title=entity,
                         label=entity, font={"color": "black", "size": 12})
            net.add_node(related_entity, color='skyblue', size=50, title=related_entity,
                         label=related_entity, font={"color": "black", "size": 12})
            net.add_node(relation, color='red', size=50, title=relation,
                         label=relation, font={"color": "black", "size": 12})
            net.add_edge(entity, relation, )
            net.add_edge(relation, related_entity)

    net.show(f"output_graphs/{entity}_relations.html")

output_graphs/pm modi_relations.html
output_graphs/ corporate transparency act ruling_relations.html
output_graphs/navy officials_relations.html
output_graphs/ wwii ‘ghost army’_relations.html


In [13]:
import pandas as pd
import spacy

nlp = spacy.load('en_core_web_md')
data = pd.read_csv("Entities.csv")
print(data)
unique_entities = pd.unique(data['Entity'])
for i in range(len(unique_entities)):
    for j in range(i+1, len(unique_entities)):
        entity1 = nlp(unique_entities[i])
        entity2 = nlp(unique_entities[j])
        similarity = entity1.similarity(entity2)
        if similarity > 0.6:
            data['Entity'].replace(
                unique_entities[j], unique_entities[i], inplace=True)

data.drop_duplicates(inplace=True)
data.reset_index(drop=True, inplace=True)
# data.to_csv("Entities.csv", index=False)

print(data)

                                               Entity
0                                             PM Modi
1             Rs 8500 crore worth projects in Tripura
2        Projects Worth Over Rs 4,500 Crore In Bengal
3   projects worth Rs 55,600 crore in Northeast, i...
4       Tax Breaks: Corporate Transparency Act Ruling
5                    Attention As Tax Season Rolls On
6                                      Navy Officials
7   a Press Briefing on President Biden's Fiscal 2...
8   125-ft tall statue of Lachit Barphukan at Holl...
9   development projects of over Rs 4500 crore in ...
10                       Prime Minister Narendra Modi
11  125-feet tall statue of medieval-era Ahom gene...
12     Rs 290 Crore Development Projects in the state
13                                                 PM
14     Rs 290 crore development projects in Meghalaya
15                           WATCH: WWII ‘Ghost Army’
16                      with Congressional Gold Medal
17  WATCH: WWII ‘Ghost Army’