In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Define file paths
papers_file = "/content/drive/My Drive/papers.csv"
interests_file = "/content/drive/My Drive/Interest.csv"

# Load datasets
def load_papers():
    df = pd.read_csv(papers_file)
    df.dropna(inplace=True)  # Drop missing values if any
    df['combined_text'] = df['title'] + " " + df['abstract']  # Merge title and abstract for better results
    return df

def load_interests():
    interests_df = pd.read_csv(interests_file)
    return interests_df

papers_df = load_papers()
interests_df = load_interests()

# TF-IDF Vectorizer for research papers
vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = vectorizer.fit_transform(papers_df["combined_text"])

# Train KNN Model for research papers
knn_papers = NearestNeighbors(n_neighbors=5, metric="cosine")  # Use cosine similarity
knn_papers.fit(tfidf_matrix)

# TF-IDF Vectorizer for professor interests
interests_vectorizer = TfidfVectorizer(stop_words="english")
interests_matrix = interests_vectorizer.fit_transform(interests_df["Field of Interest"])

# Train KNN Model for professor interests
knn_interests = NearestNeighbors(n_neighbors=3, metric="cosine")  # Use cosine similarity
knn_interests.fit(interests_matrix)

# Function to find similar research papers using KNN
def find_similar_papers_knn(query, top_n=5):
    query_vector = vectorizer.transform([query])  # Convert query to vector
    distances, indices = knn_papers.kneighbors(query_vector, n_neighbors=top_n)  # Find nearest papers

    return papers_df.iloc[indices[0]][["year", "title", "abstract"]]

# Function to find professors with similar research interests using KNN
def find_similar_professors_knn(query, top_n=3):
    query_vector = interests_vectorizer.transform([query])  # Convert query to vector
    distances, indices = knn_interests.kneighbors(query_vector, n_neighbors=top_n)  # Find nearest professors

    return interests_df.iloc[indices[0]][["Name", "Field of Interest"]]

# Command Line UI for KNN search
def main():
    print("🔍 Research Paper Finder and Professor Matcher")
    print("Enter a short description of your research paper or interest to find relevant papers and professors.")

    query = input("Describe your research: ")

    if query:
        print("\n📄 Relevant Research Papers:")
        paper_results = find_similar_papers_knn(query)

        for index, row in paper_results.iterrows():
            print(f"### {row['title']} ({row['year']})")
            print(row['abstract'])
            print("---\n")

        print("\n👩‍🏫 Professors with Similar Research Interests:")
        professor_results = find_similar_professors_knn(query)

        for index, row in professor_results.iterrows():
            print(f"Name: {row['Name']}")
            print("---\n")

if __name__ == "__main__":
    main()


Mounted at /content/drive
🔍 Research Paper Finder and Professor Matcher
Enter a short description of your research paper or interest to find relevant papers and professors.
Describe your research: traffic lights

📄 Relevant Research Papers:
### Optimal Rendezvous L-Algorithms for Asynchronous Mobile Robots with External-Lights (2018)
We study the Rendezvous problem for two autonomous mobile robots in asynchronous settings with persistent memory called light. It is well known that Rendezvous is impossible in a basic model when robots have no lights, even if the system is semi-synchronous. On the other hand, Rendezvous is possible if robots have lights of various types with a constant number of colors. If robots can observe not only their own lights but also other robots' lights, their lights are called full-light. If robots can only observe the state of other robots' lights, the lights are called external-light. This paper focuses on robots with external-lights in asynchronous settings 

In [None]:

# Command Line UI for KNN search
def main():
    print("🔍 Research Paper Finder and Professor Matcher")
    print("Enter a short description of your research paper or interest to find relevant papers and professors.")

    query = input("Describe your research: ")

    if query:
        print("\n📄 Relevant Research Papers:")
        paper_results = find_similar_papers_knn(query)

        for index, row in paper_results.iterrows():
            print(f"### {row['title']} ({row['year']})")
            print(row['abstract'])
            print("---\n")

        print("\n👩‍🏫 Professors with Similar Research Interests:")
        professor_results = find_similar_professors_knn(query)

        for index, row in professor_results.iterrows():
            print(f"Name: {row['Name']}")
            print(f"Field of Interest: {row['Field of Interest']}")
            print("---\n")

if __name__ == "__main__":
    main()

🔍 Research Paper Finder and Professor Matcher
Enter a short description of your research paper or interest to find relevant papers and professors.


In [None]:
import pandas as pd
import joblib
from sentence_transformers import SentenceTransformer
from sklearn.neighbors import NearestNeighbors
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Define file paths
papers_file = "/content/drive/My Drive/papers.csv"
interests_file = "/content/drive/My Drive/Interest.csv"
# Mount Google Drive (Only for Google Colab)
# from google.colab import drive
# drive.mount('/content/drive')

# # Define file paths
# papers_file = "papers.csv"
# interests_file = "Interest.csv"

# Load datasets
def load_papers():
    df = pd.read_csv(papers_file)
    df.dropna(inplace=True)  # Drop missing values
    df['combined_text'] = df['title'] + " " + df['abstract']  # Merge title and abstract
    return df

def load_interests():
    return pd.read_csv(interests_file)

papers_df = load_papers()
interests_df = load_interests()

# Load pre-trained BERT model
bert_model = SentenceTransformer('all-MiniLM-L6-v2')

# Convert research papers and professor interests into embeddings
papers_embeddings = bert_model.encode(papers_df["combined_text"].tolist(), convert_to_tensor=True)
interests_embeddings = bert_model.encode(interests_df["Field of Interest"].tolist(), convert_to_tensor=True)

# Train KNN for research papers
knn_papers = NearestNeighbors(n_neighbors=5, metric="cosine", n_jobs=-1)
knn_papers.fit(papers_embeddings.cpu())

# Train KNN for professor interests
knn_interests = NearestNeighbors(n_neighbors=3, metric="cosine", n_jobs=-1)
knn_interests.fit(interests_embeddings.cpu())

# Save models to avoid recomputation
joblib.dump(knn_papers, "knn_papers.pkl")
joblib.dump(knn_interests, "knn_interests.pkl")
joblib.dump(bert_model, "bert_model.pkl")

def find_similar_papers_knn(query, top_n=5):
    query_embedding = bert_model.encode([query], convert_to_tensor=True).cpu()
    distances, indices = knn_papers.kneighbors(query_embedding, n_neighbors=top_n)
    return papers_df.iloc[indices[0]][["year", "title", "abstract"]]

def find_similar_professors_knn(query, top_n=3):
    query_embedding = bert_model.encode([query], convert_to_tensor=True).cpu()
    distances, indices = knn_interests.kneighbors(query_embedding, n_neighbors=top_n)
    return interests_df.iloc[indices[0]][["Name", "Field of Interest"]]

def main():
    print("🔍 Research Paper Finder and Professor Matcher")
    query = input("Describe your research: ").strip()
    if not query:
        print("❌ Please enter a valid research description!")
        return

    print("\n📄 Relevant Research Papers:")
    paper_results = find_similar_papers_knn(query)
    for _, row in paper_results.iterrows():
        print(f"📌 {row['title']} ({row['year']})")
        print(row['abstract'])
        print("-" * 50)

    print("\n👩‍🏫 Professors with Similar Research Interests:")
    professor_results = find_similar_professors_knn(query)
    for _, row in professor_results.iterrows():
        print(f"Name: {row['Name']} - Interest: {row['Field of Interest']}")
        print("-" * 50)

if __name__ == "__main__":
    main()


Mounted at /content/drive


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

🔍 Research Paper Finder and Professor Matcher
Describe your research: traafic 

📄 Relevant Research Papers:
📌 Organization Committee (2019)
Organization Committee
--------------------------------------------------
📌 RTP Payload for Society of Motion Picture and Television Engineers (SMPTE) ST 291-1 Ancillary Data (2018)
This memo describes a Real-time Transport Protocol (RTP) payload
format for the Society of Motion Picture and Television Engineers
(SMPTE) ancillary space (ANC) data, as defined by SMPTE ST 291-1.
SMPTE ANC data is generally used along with professional video formats
to carry a range of ancillary data types, including time code, Closed
Captioning, and the Active Format Description (AFD).
--------------------------------------------------
📌 IAB, IESG, and IAOC Selection, Confirmation, and Recall Process: IAOC Advisor for the Nominating Committee (2018)
This specification formalizes an ad hoc practice used to provide
advice to the IETF Nominating Committee (NomCom) about 