In [1]:
import pandas as pd
import numpy as np
import spacy
from transformers import BartForConditionalGeneration, BartTokenizer
import torch
import json
from collections import defaultdict

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Generate summary corpus

# Load spaCy English language model for NLP processing
nlp = spacy.load('en_core_web_sm')

# Load BART model and tokenizer for summarization
bart_model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
bart_tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
print("done")

# Load the Excel file
file_path = '/content/drive/MyDrive/BTech_Project/data.xlsx'
data = pd.read_excel(file_path)
data = data.head(500)

# Function to clean and prepare text for summarization
def prepare_text(row):
    text = f"""
    {row['LinkedIn Name']} is currently working as {row['Description']} at {row['Organisation']}.
    Based in {row['Location']}, they are part of the {row['Industry']} industry.
    In their current role as {row['Current Role(s)']}, they have been with the company for {row['Tenure at Company']}.
    Their background includes: {row['About']}.
    """
    # Clean the text using NLP (removing stop words, punctuation, etc.)
    doc = nlp(text)
    cleaned_text = " ".join([token.text for token in doc if not token.is_stop and not token.is_punct])
    return cleaned_text

# Function to generate a detailed and advanced summarized corpus using BART
def generate_bart_summary(text):
    inputs = bart_tokenizer(text, max_length=1024, return_tensors="pt", truncation=True)
    summary_ids = bart_model.generate(inputs["input_ids"], num_beams=4, max_length=150, early_stopping=True)
    summary = bart_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Function to create a summarized corpus for each person
def generate_summarized_corpus(row):
    text = prepare_text(row)
    # Using BART to summarize the cleaned text
    summary = generate_bart_summary(text)
    return summary

# Create a summarized corpus for each person in the dataset
data['Corpus'] = data.apply(generate_summarized_corpus, axis=1)

# Save the summarized corpus to a CSV file
output_file_path = './summarized_corpus.csv'
data[['LinkedIn Name', 'Corpus']].to_csv(output_file_path, index=False)

print(f"Summarized corpus saved to {output_file_path}")

In [2]:
# Generate adjacency list from generated summary corpus

from collections import defaultdict

# Load the pre-summarized corpus
file_path = './summarized_corpus.csv'
data = pd.read_csv(file_path)

# Number of people (nodes)
num_people = len(data)

# Adjacency list to store connections
adjacency_list = defaultdict(list)

# Simulate connections using a normal distribution (15-20 connections per person)
np.random.seed(42)
connections_per_person = np.random.normal(loc=17.5, scale=2, size=num_people).astype(int)
connections_per_person = np.clip(connections_per_person, 15, 20)  # Ensure between 15 and 20 connections

# Generate adjacency lists
for i, person in data.iterrows():
    connections = np.random.choice(range(num_people), size=connections_per_person[i], replace=False)
    for connection in connections:
        if connection != i:  # Avoid self-loops
            adjacency_list[person['LinkedIn Name']].append(data.iloc[connection]['LinkedIn Name'])

# Display a sample of the adjacency list
sample_adjacency_list = {k: adjacency_list[k] for k in list(adjacency_list.keys())[:3]}

print("Sample Adjacency List:")
for name, connections in sample_adjacency_list.items():
    print(f"{name} -> {', '.join(connections)}")

# Save the adjacency list to a file (if needed)
# For example, saving as JSON for further analysis
import json
with open('./adjacency_list.json', 'w') as f:
    json.dump(adjacency_list, f)


Sample Adjacency List:
Saurabh Gupta -> Rachna Sharma, Prabakaran Pandian, Pawan Sut Sharma, Aayush Jha, Denis CA de Souza, Kartikay Garg, Aayush Garg, Kapil Kumar Narula, Ratnadeep Pawar, Chhaya Bhanti, Urv Patel, ANIL KUMAR SAMINENI, Mahadeva swamy, Karan Vyas, Shadab Ghazaly, Nitesh Singh, Channa Ghosh, chinmay Khanolkar
Jatin Singh -> Jacob Lallawmsang, Suchit Dekivadia, Renuka Nair, Elango Sidhan, Naveen Verma, Vipin Kumar Yadav, Mahadeva swamy, Saiprasad Bhartu, Balaram Puttaiah, Rajpal Navalkar, Wilma Rodrigues, Rajat Parikh, Cherish Tota, Gurjot Singh, Dr.Rathin Sharma, Jayavardhan Shetty, Akash Kumar
Nilesh Bhattad -> Jacob Lallawmsang, Narendra Patel, Ishant Sharma, Amit Saha, Anju Sasikumar, Dr Dnyaneshwar Battalwar, Shekar Prabhakar, Abde Ali Shabbir. ., DEEPAK KUMAR PANI, Suchit Dekivadia, Divyesh Chandera, Aayush Garg, Roopesh Rai, Sekhar C, Jayavardhan Shetty, Param Desai, Krishnan Komandur, Shibabrata Bhattacharjee


In [3]:
# DFS

def dfs_with_path(start, key, visited=None, path=None):
    if visited is None:
        visited = set()
    if path is None:
        path = [start]

    visited.add(start)

    # Check if the current person is the target
    if start == key:
        return path  # Return the path to the target person

    # Perform DFS on the neighbors
    for neighbor, _ in adjacency_list.get(start, []):
        if neighbor not in visited:
            result = dfs_with_path(neighbor, key, visited, path + [neighbor])
            if result:
                return result  # Return the path as soon as a person with the required profession is found

    return None  # No person with the required profession found in the network

In [4]:
from collections import deque

def bfs_with_path(start, target):
    # Queue for BFS (stores (name, path))
    queue = deque([(start, [start])])
    visited = set()
    visited.add(start)

    while queue:
        current, path = queue.popleft()

        # Check if the current person is the target
        if current == target:
            return path  # Return the path to the target person

        # Explore the neighbors (connections)
        for neighbor in adjacency_list.get(current, []):  # No unpacking needed
            if neighbor not in visited:
                visited.add(neighbor)
                queue.append((neighbor, path + [neighbor]))

    return None  # No path to the target found

In [5]:
starting_name = "Saurabh Gupta"
target_name = "Roopesh Rai"  # The target person's name

# Perform BFS to find a path from starting_name to target_name
path = bfs_with_path(starting_name, target_name)

if path:
    print(" -> ".join(path))  # Print the path if found
else:
    print(f"No person with the name {target_name} found in the network starting from {starting_name}.")

Saurabh Gupta -> Rachna Sharma -> Mohammed Irfan -> Roopesh Rai


In [9]:
# Example usage
starting_name = "Saurabh Gupta"
target = "Roopesh Rai"

# Perform BFS to find a path from starting_name to target_name
path = bfs_with_path(starting_name, target)

if path:
    print(" -> ".join(path))  # Print the path

    # Extract the last person (the one with the required profession) from the path
    selected_person = path[-1]

    # Load the summarized corpus data with UTF-8 encoding
    file_path = './summarized_corpus.csv'  # Path to the summarized corpus file
    with open(file_path, 'r', encoding='utf-8') as f:
        corpus_data = f.readlines()

    # Find the row with the selected person
    person_data = None
    for line in corpus_data:
        name, data = line.split(",", 1)  # Split the line by the first comma
        if name.strip() == selected_person:  # Compare the name with the selected person
            person_data = data.strip()  # Get the corpus data (remove leading/trailing spaces)
            break

    if person_data:
        print("\nCorpus data for selected person:")
        print(person_data)
    else:
        print(f"No corpus data found for {selected_person}")
else:
    print(f"{target} not found in the network starting from {starting_name}.")

Saurabh Gupta -> Rachna Sharma -> Mohammed Irfan -> Roopesh Rai

Corpus data for selected person:
Roopesh Rai is the Founder Chief Executive Officer of Bakri Chhap Agro Tourism Natural Products Pvt Ltd. BakriChhap pioneering initiative seeks transform rural India sustainable tourism empowerment local communities. Roopesh sees potential rural India offer real reality hospitality tourism. He understands youth rural areas hold key change problem solution fostering eco entrepreneurship.
