In [1]:
# LOADING TOOLS AND DATASET

from tensorflow import keras
import tensorflow as tf
from sklearn.model_selection import train_test_split
from ast import literal_eval
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np



In [2]:
arxiv_data = pd.read_csv("arxiv_data_210930-054931.csv")
arxiv_data.head()

Unnamed: 0,terms,titles,abstracts
0,['cs.LG'],Multi-Level Attention Pooling for Graph Neural...,Graph neural networks (GNNs) have been widely ...
1,"['cs.LG', 'cs.AI']",Decision Forests vs. Deep Networks: Conceptual...,Deep networks and decision forests (such as ra...
2,"['cs.LG', 'cs.CR', 'stat.ML']",Power up! Robust Graph Convolutional Network v...,Graph convolutional networks (GCNs) are powerf...
3,"['cs.LG', 'cs.CR']",Releasing Graph Neural Networks with Different...,With the increasing popularity of Graph Neural...
4,['cs.LG'],Recurrence-Aware Long-Term Cognitive Network f...,Machine learning solutions for pattern classif...


In [3]:
title = arxiv_data["titles"]
title 

0        Multi-Level Attention Pooling for Graph Neural...
1        Decision Forests vs. Deep Networks: Conceptual...
2        Power up! Robust Graph Convolutional Network v...
3        Releasing Graph Neural Networks with Different...
4        Recurrence-Aware Long-Term Cognitive Network f...
                               ...                        
56176    Mining Spatio-temporal Data on Industrializati...
56177    Wav2Letter: an End-to-End ConvNet-based Speech...
56178    Deep Reinforcement Learning with Double Q-lear...
56179                          Generalized Low Rank Models
56180    Chi-square Tests Driven Method for Learning th...
Name: titles, Length: 56181, dtype: object

In [4]:
# DATA CLEANING

arxiv_data.drop(columns = ["terms","abstracts"], inplace = True)
arxiv_data

Unnamed: 0,titles
0,Multi-Level Attention Pooling for Graph Neural...
1,Decision Forests vs. Deep Networks: Conceptual...
2,Power up! Robust Graph Convolutional Network v...
3,Releasing Graph Neural Networks with Different...
4,Recurrence-Aware Long-Term Cognitive Network f...
...,...
56176,Mining Spatio-temporal Data on Industrializati...
56177,Wav2Letter: an End-to-End ConvNet-based Speech...
56178,Deep Reinforcement Learning with Double Q-lear...
56179,Generalized Low Rank Models


In [5]:
%pip install -U -q sentence-transformers

In [6]:
# SENTENCE TRANSFORMING


from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('all-MiniLM-L6-v2')
sentences = arxiv_data['titles']
embeddings = model.encode(sentences)

embeddings

array([[ 0.06643407, -0.04954597,  0.06388086, ...,  0.00106307,
        -0.12156382, -0.06962774],
       [ 0.09212258, -0.0760694 ,  0.06572867, ..., -0.08565165,
        -0.09266549,  0.00725293],
       [-0.08162683,  0.02428932,  0.01888746, ...,  0.00806159,
        -0.0512953 , -0.05873994],
       ...,
       [-0.09695333,  0.00057087,  0.0772649 , ..., -0.0144381 ,
        -0.04748215,  0.06130563],
       [ 0.00768867, -0.1012418 ,  0.08909852, ..., -0.08199866,
        -0.05649742,  0.09007055],
       [ 0.06078518, -0.08312802, -0.00907769, ..., -0.03148185,
         0.05713108,  0.0569689 ]], dtype=float32)

In [7]:
# PRINTING EMBEDDINGS

c = 0
for sentence, embedding in zip(sentences, embeddings):
    print("Sentence:", sentence)
    print("Embedding length:", len(embedding)) 
    print("")
    if c >=5:
        break
    c +=1 

Sentence: Multi-Level Attention Pooling for Graph Neural Networks: Unifying Graph Representations with Multiple Localities
Embedding length: 384

Sentence: Decision Forests vs. Deep Networks: Conceptual Similarities and Empirical Differences at Small Sample Sizes
Embedding length: 384

Sentence: Power up! Robust Graph Convolutional Network via Graph Powering
Embedding length: 384

Sentence: Releasing Graph Neural Networks with Differential Privacy Guarantees
Embedding length: 384

Sentence: Recurrence-Aware Long-Term Cognitive Network for Explainable Pattern Classification
Embedding length: 384

Sentence: Lifelong Graph Learning
Embedding length: 384



In [8]:
# FILE SAVING

import pickle

with open('models/embeddings.pkl', 'wb') as f:
    pickle.dump(embeddings, f)

with open('models/sentences.pkl', 'wb') as f:
    pickle.dump(sentences, f)
    
with open('models/rec_model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [9]:
# RECOMMENDATION FOR PAPERS

import pickle
embeddings = pickle.load(open('models/embeddings.pkl','rb'))
sentences = pickle.load(open('models/sentences.pkl','rb'))
rec_model = pickle.load(open('models/rec_model.pkl','rb'))

In [10]:

import torch

def recommendation(input_paper):
    cosine_scores = util.cos_sim(embeddings, rec_model.encode(input_paper))
    top_similar_papers = torch.topk(cosine_scores, dim=0, k=5, sorted=True)
    papers_list = []
    for i in top_similar_papers.indices:
        papers_list.append(sentences[i.item()])
    
    return papers_list

In [11]:
# EXAMPLE 

input_paper = input("Enter the title of any paper you like")
recommend_papers = recommendation(input_paper)

print("We recommend to read this paper............")
print("=============================================")
for paper in recommend_papers:
    print(paper)


We recommend to read this paper............
Epipolar Transformers
Understanding and Accelerating EM Algorithm's Convergence by Fair Competition Principle and Rate-Verisimilitude Function
Augmenting Light Field to model Wave Optics effects
Image segmentation by adaptive distance based on EM algorithm
Image segmentation by adaptive distance based on EM algorithm
