In [1]:
import pandas as pd
from itertools import combinations

In [2]:
# Load the cosine similarity matrix (make sure this file is in the same folder)
similarity_df = pd.read_excel('/content/Researcher_Cosine_Similarity.xlsx', index_col=0)

In [3]:
# Clean column names (sometimes Excel adds unnamed columns)
similarity_df.columns = [col.strip() for col in similarity_df.columns]

In [4]:
# Generate all unique researcher pairs (exclude self-pairs)
pairs = []
for res1, res2 in combinations(similarity_df.index, 2):
    sim_score = similarity_df.loc[res1, res2]
    pairs.append((res1, res2, sim_score))


In [5]:
# Sort by similarity (ascending → more diverse)
diverse_pairs = sorted(pairs, key=lambda x: x[2])

In [6]:
# Select top 3 diverse pairs for potential interdisciplinary collaboration
top_3_diverse_collabs = diverse_pairs[:3]

In [8]:
# Print the results
print(" Top 3 Recommended Interdisciplinary Research Collaborations:\n")
for i, (r1, r2, score) in enumerate(top_3_diverse_collabs, start=1):
    print(f"{i}. {r1} ↔ {r2} (Cosine Similarity: {score:.2f}) — High thematic diversity")

 Top 3 Recommended Interdisciplinary Research Collaborations:

1. Milad Alshomary ↔ Shuo Wang (Cosine Similarity: 0.03) — High thematic diversity
2. Diane M. Napolitano ↔ Shuo Wang (Cosine Similarity: 0.04) — High thematic diversity
3. Milad Alshomary ↔ Sourav Ghosh (Cosine Similarity: 0.04) — High thematic diversity


In [9]:
output_df = pd.DataFrame(top_3_diverse_collabs, columns=["Researcher_1", "Researcher_2", "Cosine_Similarity"])
output_df.to_excel("Top_3_Interdisciplinary_Collaborations.xlsx", index=False)

In [10]:
# Sort by similarity descending (most similar at top)
similar_pairs = sorted(pairs, key=lambda x: x[2], reverse=True)

In [11]:
# Take the topmost pair
most_similar_pair = similar_pairs[0]

In [13]:
# Print result
print(" Most Similar Researcher Pair (Shared Research Interest):")
print(f"{most_similar_pair[0]} ↔ {most_similar_pair[1]} (Cosine Similarity: {most_similar_pair[2]:.2f})")

 Most Similar Researcher Pair (Shared Research Interest):
Rudolf Rosa ↔ John E. Ortega (Cosine Similarity: 0.36)


In [14]:
# Optional: save to file
similar_df = pd.DataFrame([most_similar_pair], columns=["Researcher_1", "Researcher_2", "Cosine_Similarity"])
similar_df.to_excel("Most_Similar_Researchers.xlsx", index=False)