In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [28]:
# Import necessary libraries
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import re  # Import the re module for regular expressions
import matplotlib.pyplot as plt

# Download NLTK data files
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [29]:
# Load the full dataset
data = pd.read_csv('/content/drive/MyDrive/NLP Task/collection_with_abstracts.csv', sep=',')  # Adjust separator if needed
print("Data loaded successfully.")
data.head()  # Display the first few rows

Data loaded successfully.


Unnamed: 0,PMID,Title,Authors,Citation,First Author,Journal/Book,Publication Year,Create Date,PMCID,NIHMS ID,DOI,Abstract
0,39435445,Editorial: The operationalization of cognitive...,"Winter M, Probst T, Tallon M, Schobel J, Pryss R.",Front Neurosci. 2024 Oct 7;18:1501636. doi: 10...,Winter M,Front Neurosci,2024,2024/10/22,PMC11491427,,10.3389/fnins.2024.1501636,
1,39398866,Characterization of arteriosclerosis based on ...,"Zhou J, Li X, Demeke D, Dinh TA, Yang Y, Janow...",J Med Imaging (Bellingham). 2024 Sep;11(5):057...,Zhou J,J Med Imaging (Bellingham),2024,2024/10/14,PMC11466048,,10.1117/1.JMI.11.5.057501,PURPOSE: Our purpose is to develop a computer ...
2,39390053,Multi-scale input layers and dense decoder agg...,"Lan X, Jin W.",Sci Rep. 2024 Oct 10;14(1):23729. doi: 10.1038...,Lan X,Sci Rep,2024,2024/10/10,PMC11467340,,10.1038/s41598-024-74701-0,Accurate segmentation of COVID-19 lesions from...
3,39367648,An initial game-theoretic assessment of enhanc...,"Fatemi MY, Lu Y, Diallo AB, Srinivasan G, Azhe...",Brief Bioinform. 2024 Sep 23;25(6):bbae476. do...,Fatemi MY,Brief Bioinform,2024,2024/10/05,PMC11452536,,10.1093/bib/bbae476,The application of deep learning to spatial tr...
4,39363262,Truncated M13 phage for smart detection of E. ...,"Yuan J, Zhu H, Li S, Thierry B, Yang CT, Zhang...",J Nanobiotechnology. 2024 Oct 3;22(1):599. doi...,Yuan J,J Nanobiotechnology,2024,2024/10/04,PMC11451008,,10.1186/s12951-024-02881-y,BACKGROUND: The urgent need for affordable and...


In [31]:
# Define a function to check for deep learning keywords
def contains_deep_learning(text):
    deep_learning_keywords = ['deep learning', 'neural network', 'cnn', 'rnn', 'lstm', 'transformer']
    if isinstance(text, str):
        text = text.lower()
        return any(keyword in text for keyword in deep_learning_keywords)
    return False

# Filter papers that mention deep learning methods
data['is_relevant'] = data['Abstract'].apply(contains_deep_learning)
relevant_papers = data.loc[data['is_relevant']].copy()  # Use .copy() to avoid warnings
print(f"Number of relevant papers: {len(relevant_papers)}")

Number of relevant papers: 4333


In [32]:
# Define a function to classify the method used
def classify_method(text):
    text = text.lower()
    if 'text mining' in text and 'computer vision' in text:
        return 'both'
    elif 'text mining' in text:
        return 'text mining'
    elif 'computer vision' in text:
        return 'computer vision'
    else:
        return 'other'

# Apply classification using .loc to avoid SettingWithCopyWarning
relevant_papers.loc[:, 'method_type'] = relevant_papers['Abstract'].apply(classify_method)

In [33]:
# Define a function to extract method names
def extract_method_name(text):
    method_pattern = re.compile(r'\b(?:cnn|rnn|lstm|transformer)\b', re.IGNORECASE)
    methods = method_pattern.findall(text)
    return ', '.join(set(methods))

# Apply method extraction
relevant_papers.loc[:, 'methods'] = relevant_papers['Abstract'].apply(extract_method_name)

# Display the first few rows with extracted methods
relevant_papers.head()

Unnamed: 0,PMID,Title,Authors,Citation,First Author,Journal/Book,Publication Year,Create Date,PMCID,NIHMS ID,DOI,Abstract,is_relevant,method_type,methods
1,39398866,Characterization of arteriosclerosis based on ...,"Zhou J, Li X, Demeke D, Dinh TA, Yang Y, Janow...",J Med Imaging (Bellingham). 2024 Sep;11(5):057...,Zhou J,J Med Imaging (Bellingham),2024,2024/10/14,PMC11466048,,10.1117/1.JMI.11.5.057501,PURPOSE: Our purpose is to develop a computer ...,True,computer vision,
3,39367648,An initial game-theoretic assessment of enhanc...,"Fatemi MY, Lu Y, Diallo AB, Srinivasan G, Azhe...",Brief Bioinform. 2024 Sep 23;25(6):bbae476. do...,Fatemi MY,Brief Bioinform,2024,2024/10/05,PMC11452536,,10.1093/bib/bbae476,The application of deep learning to spatial tr...,True,other,
4,39363262,Truncated M13 phage for smart detection of E. ...,"Yuan J, Zhu H, Li S, Thierry B, Yang CT, Zhang...",J Nanobiotechnology. 2024 Oct 3;22(1):599. doi...,Yuan J,J Nanobiotechnology,2024,2024/10/04,PMC11451008,,10.1186/s12951-024-02881-y,BACKGROUND: The urgent need for affordable and...,True,other,CNN
13,39182615,Artificial intelligence-driven automated lung ...,"Ismail MK, Araki T, Gefter WB, Suzuki Y, Raevs...",Am J Transplant. 2024 Aug 23:S1600-6135(24)005...,Ismail MK,Am J Transplant,2024,2024/08/25,,,10.1016/j.ajt.2024.08.015,Lung size measurements play an important role ...,True,computer vision,
15,39155966,Examining the Role of Passive Design Indicator...,"Ghorbany S, Hu M, Yao S, Wang C, Nguyen QC, Yu...",Build Environ. 2024 Feb 15;250:111126. doi: 10...,Ghorbany S,Build Environ,2024,2024/08/19,PMC11326486,NIHMS1957332,10.1016/j.buildenv.2023.111126,Passive design characteristics (PDC) play a pi...,True,computer vision,


In [34]:
# Save the processed data to a new CSV file
relevant_papers.to_csv('/content/drive/MyDrive/NLP Task/relevant_papers.csv', index=False)
print("Relevant papers saved successfully.")

Relevant papers saved successfully.
