In [42]:
!pip install langchain langchain-cohere cohere biopython semanticscholar transformers torch sentencepiece -q

In [7]:
from langchain_cohere import ChatCohere
import cohere
from google.colab import userdata

cohere_api_key = userdata.get('cohere_API_key')
co = cohere.Client(cohere_api_key)


* 'allow_population_by_field_name' has been renamed to 'populate_by_name'
* 'smart_union' has been removed


In [8]:
query = "Articles about menstrual cycle and females"
response = co.generate(
    model = "c4ai-aya-23-35b",
    prompt = f'''You are a Keyword Generator for a keyword search engine. Your job is only to generate relevant keywords that can be plugged into a keyword search engine given a search query.
    Given this search query: "{query}" generate symantically relevant Keywords.''',
    max_tokens=300,
    temperature=0.01,
    k=1,
    stop_sequences=[],
    return_likelihoods='NONE'
)

print(response.generations[0].text)

Sure! Here are some relevant keywords for the given search query: 

- "Menstrual cycle phases"
- "Female reproductive health"
- "Period tracking"
- "Menstruation education"
- "Hormonal changes during menstruation"
- "Common menstrual issues"
- "Female-specific health concerns"
- "Menstrual products and options"
- "Social impact of menstruation"
- "Taboos and culture around menstruation" 

These keywords should provide a good starting point for a keyword search engine to retrieve relevant articles and information about the menstrual cycle and its relation to female health and experiences.


In [18]:
import re

keywords_list = re.findall(r'-\s"([^"]+)"', response.generations[0].text)
keywords_list

['Menstrual cycle phases',
 'Female reproductive health',
 'Period tracking',
 'Menstruation education',
 'Hormonal changes during menstruation',
 'Common menstrual issues',
 'Female-specific health concerns',
 'Menstrual products and options',
 'Social impact of menstruation',
 'Taboos and culture around menstruation']

In [19]:
from Bio import Entrez, Medline

userdata = {
    'email': '',
    'api_key': ''
}

Entrez.email = userdata.get('email')
Entrez.api_key = userdata.get('api_key')

In [20]:
import pandas as pd

#ask user to select earliest publication date
publication_date = '2024/01/01'
retmax = 250

unique_pmids = set()
keyword_dictionary_pmids = {}

for kw in keywords_list:
    search_term = kw
    handle = Entrez.esearch(db="pubmed", term=search_term, mindate=publication_date, retmax=retmax)
    record = Entrez.read(handle)
    handle.close()

    keyword_dictionary_pmids[kw] = record["IdList"]

    # Filter out PMIDs that have been already collected
    new_pmids = set(record["IdList"]) - unique_pmids

    print(f"{len(new_pmids)} new results found for Search '{kw}'")

    # Update the set of unique PMIDs
    unique_pmids.update(new_pmids)

250 new results found for Search 'Menstrual cycle phases'
246 new results found for Search 'Female reproductive health'
249 new results found for Search 'Period tracking'
235 new results found for Search 'Menstruation education'
224 new results found for Search 'Hormonal changes during menstruation'
241 new results found for Search 'Common menstrual issues'
40 new results found for Search 'Female-specific health concerns'
56 new results found for Search 'Menstrual products and options'
209 new results found for Search 'Social impact of menstruation'
9 new results found for Search 'Taboos and culture around menstruation'


In [21]:
for keyword, pmid_list in keyword_dictionary_pmids.items():
    print(f"Search: {keyword}, \n PMIDs: {len(pmid_list)}")
    print()

Search: Menstrual cycle phases, 
 PMIDs: 250

Search: Female reproductive health, 
 PMIDs: 250

Search: Period tracking, 
 PMIDs: 250

Search: Menstruation education, 
 PMIDs: 250

Search: Hormonal changes during menstruation, 
 PMIDs: 250

Search: Common menstrual issues, 
 PMIDs: 250

Search: Female-specific health concerns, 
 PMIDs: 41

Search: Menstrual products and options, 
 PMIDs: 58

Search: Social impact of menstruation, 
 PMIDs: 250

Search: Taboos and culture around menstruation, 
 PMIDs: 20



In [22]:
print(len(unique_pmids))

1759


In [23]:
import pandas as pd
from Bio import Entrez
from concurrent.futures import ThreadPoolExecutor

# Define a function to fetch article data
def fetch_article_data(pmid):
    try:
        handle = Entrez.efetch(db="pubmed", id=pmid, retmode="xml")
        record = Entrez.read(handle)
        handle.close()

        # Initialize variables with default values
        title = 'Title not available'
        abstract = None
        authors = []
        affiliations = []
        keywords = []
        journal = 'Journal not available'
        pub_year = 'Year not available'

        # Extract data as needed with checks
        if 'PubmedArticle' in record and len(record['PubmedArticle']) > 0:
            citation = record['PubmedArticle'][0]['MedlineCitation']

            if 'Article' in citation:
                article = citation['Article']
                title = article.get('ArticleTitle', 'Title not available')

                # Check if the abstract is available
                if 'Abstract' in article:
                    abstract = article['Abstract']['AbstractText']
                    abstract = ' '.join(abstract) if abstract else None

                # Extract author names & author affiliations
                author_list = article.get('AuthorList', [])
                for author in author_list:
                    if 'LastName' in author and 'Initials' in author:
                        full_name = f"{author['LastName']} {author['Initials']}"
                        authors.append(full_name)
                    if 'AffiliationInfo' in author:
                        affiliation_info = author['AffiliationInfo']
                        if len(affiliation_info) > 0:
                            affiliation_name_first = affiliation_info[0]
                            affiliations.append(affiliation_name_first.get('Affiliation', "Not available"))

                # Extract author keywords
                keyword_list = citation.get('KeywordList', [])
                if len(keyword_list) > 0:
                    for keyword in keyword_list[0]:
                        keywords.append(keyword)

                journal_info = article.get('Journal', {})
                journal = journal_info.get('Title', 'Journal not available')
                issue = journal_info.get('JournalIssue', {})
                pub_date = issue.get('PubDate', {})
                pub_year = pub_date.get('Year', 'Year not available')

        return {
            "PMID": pmid,
            "Title": title,
            "Abstract": abstract,
            "Authors": ['; '.join(authors)],
            "Author Affiliations": ['; '.join(affiliations)],
            "Author Keywords": [';'.join(keywords)],
            "Publication Title": journal,
            "Publication Year": pub_year,
        }

    except Exception as e:
        print(f"Error retrieving data for PMID {pmid}: {str(e)}")
        return None

# Specify the number of concurrent threads
num_threads = 5

# Use ThreadPoolExecutor for concurrent fetching
with ThreadPoolExecutor(max_workers=num_threads) as executor:
    # Fetch article data concurrently
    results = list(executor.map(fetch_article_data, unique_pmids))

# Remove None values (articles with errors)
results = [result for result in results if result is not None]

# Create the DataFrame
df = pd.DataFrame(results)

# Print the resulting DataFrame
print(df.head())

Error retrieving data for PMID 38260168: HTTP Error 429: Too Many Requests
Error retrieving data for PMID 37993900: HTTP Error 429: Too Many Requests
Error retrieving data for PMID 35682310: HTTP Error 429: Too Many Requests
Error retrieving data for PMID 17636607: HTTP Error 429: Too Many Requests
Error retrieving data for PMID 29479656: HTTP Error 429: Too Many Requests
Error retrieving data for PMID 39011939: HTTP Error 429: Too Many Requests
Error retrieving data for PMID 38530762: HTTP Error 429: Too Many Requests
Error retrieving data for PMID 38484430: HTTP Error 429: Too Many Requests
Error retrieving data for PMID 38487254: HTTP Error 429: Too Many Requests
Error retrieving data for PMID 36819572: HTTP Error 429: Too Many Requests
Error retrieving data for PMID 28725187: HTTP Error 429: Too Many Requests
Error retrieving data for PMID 39058707: HTTP Error 429: Too Many Requests
Error retrieving data for PMID 21688870: HTTP Error 429: Too Many Requests
Error retrieving data for

In [24]:
len(df)

1190

In [25]:
df.head()

Unnamed: 0,PMID,Title,Abstract,Authors,Author Affiliations,Author Keywords,Publication Title,Publication Year
0,33154830,Change in Estradiol Levels among Premenopausal...,The combination of luteinizing hormone-releasi...,[Lee YJ; Wu ZY; Kim HJ; Lee JW; Chung IY; Kim ...,"[Division of Breast Surgery, Department of Sur...",[Antineoplastic agents;Breast neoplasms;Gonado...,Journal of breast cancer,2020
1,20207519,Age at menarche and premenstrual syndrome in a...,The issues involving menstruation are the topi...,[Ibralic I; Sinanovic O; Memisevic H],"[Center Vladimir Nazor, Sarajevo, Bosnia and H...",[],Research in developmental disabilities,2010
2,38951612,Archaeological evidence of an ethnographically...,"In societies without writing, ethnographically...",[David B; Mullett R; Wright N; Stephenson B; A...,"[Monash Indigenous Studies Centre, Monash Univ...",[],Nature human behaviour,2024
3,39014128,"Impact of the 2008 <math xmlns=""http://www.w3....",Tectonic plate motions drive the earthquake cy...,[Iaffaldano G; Martin de Blas J; Rui X; Stamps...,"[Department of Chemistry, Life Science and Env...",[],Scientific reports,2024
4,26670027,Characteristics of the menstrual cycle in 13-y...,This study aimed to investigate the characteri...,[Hoppenbrouwers K; Roelants M; Meuleman C; Rij...,"[Centre of Environment and Health, Department ...",[Impact on social life;Menstrual cycle charact...,European journal of pediatrics,2016


In [26]:
df.to_csv('entrez_data.csv')

In [28]:
from semanticscholar import SemanticScholar
sch = SemanticScholar()
results = sch.search_paper("Menstrual Cycle and females")
print(f'{results.total} results.')

11417 results.


In [31]:
import pandas as pd

# Initialize an empty list to store the data
data = []

for i in results:
    try:
        # Access attributes directly
        pmid = getattr(i, 'externalIds', {}).get('PubMed', '')
        url = "https://pubmed.ncbi.nlm.nih.gov/" + str(pmid)
    except:
        url = getattr(i, 'url', '')

    # Collect data into a dictionary
    entry = {
        'title': getattr(i, 'title', ''),
        'abstract': getattr(i, 'abstract', ''),
        'authors': [j.name for j in getattr(i, 'authors', [])],
        'url': url,
        'Publication Year': getattr(i, 'year', ''),
        'Publication Title': getattr(i, 'journal', '')
    }
    # Append the dictionary to the list
    data.append(entry)

# Create the DataFrame from the list of dictionaries
df2 = pd.DataFrame(data)

# Print the resulting DataFrame
print(df2.head())

                                               title  \
0  Urinary steroid profile in females - the impac...   
1  Menstrual Cycle Phases Influence on Cardioresp...   
2  Effects of menstrual cycle and neuroticism on ...   
3  The Effect of the Menstrual Cycle and Oral Con...   
4  Ethanol Elimination in Males and Females: Rela...   

                                            abstract  \
0  Today's doping tests involving longitudinal mo...   
1  The aim of this study was to analyse the impac...   
2                                               None   
3  Most reproductive-aged women are exposed to fl...   
4  Ethanol pharmacokinetics were determined follo...   

                                             authors  \
0  [J. Mullen, J. Thörngren, J. Schulze, M. Erics...   
1  [B. Rael, V. M. Alfaro-Magallanes, N. Romero-P...   
2                [Mengying Wu, R. Zhou, Yamei Huang]   
3  [B. Thompson, Kaitlyn B Drover, Rhiannon Stell...   
4    [A. Marshall, D. Kingstone, M. Boss, M. M

In [32]:
df2.head()

Unnamed: 0,title,abstract,authors,url,Publication Year,Publication Title
0,Urinary steroid profile in females - the impac...,Today's doping tests involving longitudinal mo...,"[J. Mullen, J. Thörngren, J. Schulze, M. Erics...",https://pubmed.ncbi.nlm.nih.gov/27758048,2017.0,"{'name': 'Drug testing and analysis', 'pages':..."
1,Menstrual Cycle Phases Influence on Cardioresp...,The aim of this study was to analyse the impac...,"[B. Rael, V. M. Alfaro-Magallanes, N. Romero-P...",https://pubmed.ncbi.nlm.nih.gov/33498274,2021.0,{'name': 'International Journal of Environment...
2,Effects of menstrual cycle and neuroticism on ...,,"[Mengying Wu, R. Zhou, Yamei Huang]",https://pubmed.ncbi.nlm.nih.gov/25312202,2014.0,{'name': 'International journal of psychophysi...
3,The Effect of the Menstrual Cycle and Oral Con...,Most reproductive-aged women are exposed to fl...,"[B. Thompson, Kaitlyn B Drover, Rhiannon Stell...",https://pubmed.ncbi.nlm.nih.gov/34682310,2021.0,{'name': 'International Journal of Environment...
4,Ethanol Elimination in Males and Females: Rela...,Ethanol pharmacokinetics were determined follo...,"[A. Marshall, D. Kingstone, M. Boss, M. Morgan]",https://pubmed.ncbi.nlm.nih.gov/6618437,2007.0,"{'name': 'Hepatology', 'volume': '3'}"


In [33]:
df1 = pd.read_csv('entrez_data.csv')

In [34]:
len(df1)

1190

In [35]:
df1['url'] = df1['PMID'].apply(lambda x: "https://pubmed.ncbi.nlm.nih.gov/"+str(x))
df1.head()

Unnamed: 0.1,Unnamed: 0,PMID,Title,Abstract,Authors,Author Affiliations,Author Keywords,Publication Title,Publication Year,url
0,0,33154830,Change in Estradiol Levels among Premenopausal...,The combination of luteinizing hormone-releasi...,['Lee YJ; Wu ZY; Kim HJ; Lee JW; Chung IY; Kim...,"['Division of Breast Surgery, Department of Su...",['Antineoplastic agents;Breast neoplasms;Gonad...,Journal of breast cancer,2020,https://pubmed.ncbi.nlm.nih.gov/33154830
1,1,20207519,Age at menarche and premenstrual syndrome in a...,The issues involving menstruation are the topi...,['Ibralic I; Sinanovic O; Memisevic H'],"['Center Vladimir Nazor, Sarajevo, Bosnia and ...",[''],Research in developmental disabilities,2010,https://pubmed.ncbi.nlm.nih.gov/20207519
2,2,38951612,Archaeological evidence of an ethnographically...,"In societies without writing, ethnographically...",['David B; Mullett R; Wright N; Stephenson B; ...,"['Monash Indigenous Studies Centre, Monash Uni...",[''],Nature human behaviour,2024,https://pubmed.ncbi.nlm.nih.gov/38951612
3,3,39014128,"Impact of the 2008 <math xmlns=""http://www.w3....",Tectonic plate motions drive the earthquake cy...,['Iaffaldano G; Martin de Blas J; Rui X; Stamp...,"['Department of Chemistry, Life Science and En...",[''],Scientific reports,2024,https://pubmed.ncbi.nlm.nih.gov/39014128
4,4,26670027,Characteristics of the menstrual cycle in 13-y...,This study aimed to investigate the characteri...,"[""Hoppenbrouwers K; Roelants M; Meuleman C; Ri...","['Centre of Environment and Health, Department...",['Impact on social life;Menstrual cycle charac...,European journal of pediatrics,2016,https://pubmed.ncbi.nlm.nih.gov/26670027


In [49]:
# Inspect columns in both DataFrames
print("df1 columns:", df1.columns)
print("df2 columns:", df2.columns)

# Define columns to select based on the actual columns present in both DataFrames
common_columns = ['PMID', 'Title', 'Abstract', 'Authors', 'Publication Year', 'url']

# Ensure that the selected columns exist in both DataFrames
df1_columns = [col for col in common_columns if col in df1.columns]
df2_columns = [col for col in common_columns if col in df2.columns]

# Combine the DataFrames without duplicates based on PMID
combined_df = pd.concat([
    df1.loc[:, df1_columns],
    df2.loc[:, df2_columns]
], ignore_index=True).drop_duplicates(subset='PMID')

# Print the combined DataFrame
print(combined_df.head())

df1 columns: Index(['Unnamed: 0', 'PMID', 'Title', 'Abstract', 'Authors',
       'Author Affiliations', 'Author Keywords', 'Publication Title',
       'Publication Year', 'url', 'Publication Date'],
      dtype='object')
df2 columns: Index(['title', 'abstract', 'authors', 'url', 'Publication Year',
       'Publication Title', 'Publication Date', 'PMID', 'Title', 'Abstract',
       'Authors'],
      dtype='object')
       PMID                                              Title  \
0  33154830  Change in Estradiol Levels among Premenopausal...   
1  20207519  Age at menarche and premenstrual syndrome in a...   
2  38951612  Archaeological evidence of an ethnographically...   
3  39014128  Impact of the 2008 <math xmlns="http://www.w3....   
4  26670027  Characteristics of the menstrual cycle in 13-y...   

                                            Abstract  \
0  The combination of luteinizing hormone-releasi...   
1  The issues involving menstruation are the topi...   
2  In societies w

In [50]:
combined_df.head()

Unnamed: 0,PMID,Title,Abstract,Authors,Publication Year,url
0,33154830,Change in Estradiol Levels among Premenopausal...,The combination of luteinizing hormone-releasi...,['Lee YJ; Wu ZY; Kim HJ; Lee JW; Chung IY; Kim...,2020,https://pubmed.ncbi.nlm.nih.gov/33154830
1,20207519,Age at menarche and premenstrual syndrome in a...,The issues involving menstruation are the topi...,['Ibralic I; Sinanovic O; Memisevic H'],2010,https://pubmed.ncbi.nlm.nih.gov/20207519
2,38951612,Archaeological evidence of an ethnographically...,"In societies without writing, ethnographically...",['David B; Mullett R; Wright N; Stephenson B; ...,2024,https://pubmed.ncbi.nlm.nih.gov/38951612
3,39014128,"Impact of the 2008 <math xmlns=""http://www.w3....",Tectonic plate motions drive the earthquake cy...,['Iaffaldano G; Martin de Blas J; Rui X; Stamp...,2024,https://pubmed.ncbi.nlm.nih.gov/39014128
4,26670027,Characteristics of the menstrual cycle in 13-y...,This study aimed to investigate the characteri...,"[""Hoppenbrouwers K; Roelants M; Meuleman C; Ri...",2016,https://pubmed.ncbi.nlm.nih.gov/26670027


In [51]:
combined_df['Title + Abstract'] = combined_df['Title'] + ' ' + combined_df['Abstract'].fillna('')
combined_df.head()

Unnamed: 0,PMID,Title,Abstract,Authors,Publication Year,url,Title + Abstract
0,33154830,Change in Estradiol Levels among Premenopausal...,The combination of luteinizing hormone-releasi...,['Lee YJ; Wu ZY; Kim HJ; Lee JW; Chung IY; Kim...,2020,https://pubmed.ncbi.nlm.nih.gov/33154830,Change in Estradiol Levels among Premenopausal...
1,20207519,Age at menarche and premenstrual syndrome in a...,The issues involving menstruation are the topi...,['Ibralic I; Sinanovic O; Memisevic H'],2010,https://pubmed.ncbi.nlm.nih.gov/20207519,Age at menarche and premenstrual syndrome in a...
2,38951612,Archaeological evidence of an ethnographically...,"In societies without writing, ethnographically...",['David B; Mullett R; Wright N; Stephenson B; ...,2024,https://pubmed.ncbi.nlm.nih.gov/38951612,Archaeological evidence of an ethnographically...
3,39014128,"Impact of the 2008 <math xmlns=""http://www.w3....",Tectonic plate motions drive the earthquake cy...,['Iaffaldano G; Martin de Blas J; Rui X; Stamp...,2024,https://pubmed.ncbi.nlm.nih.gov/39014128,"Impact of the 2008 <math xmlns=""http://www.w3...."
4,26670027,Characteristics of the menstrual cycle in 13-y...,This study aimed to investigate the characteri...,"[""Hoppenbrouwers K; Roelants M; Meuleman C; Ri...",2016,https://pubmed.ncbi.nlm.nih.gov/26670027,Characteristics of the menstrual cycle in 13-y...


In [52]:
combined_df.to_csv('pre_final.csv')

In [53]:
df = pd.read_csv('/content/pre_final.csv')

In [54]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config

model = T5ForConditionalGeneration.from_pretrained('t5-small')
tokenizer = T5Tokenizer.from_pretrained('t5-small')
device = torch.device('cpu')

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [55]:
text = """
Python is an interpreted high-level general-purpose programming language. Its design philosophy emphasizes code readability with its use of significant indentation. Its language constructs as well as its object-oriented approach aim to help programmers write clear, logical code for small and large-scale projects.[30]

Python is dynamically-typed and garbage-collected. It supports multiple programming paradigms, including structured (particularly, procedural), object-oriented and functional programming. It is often described as a "batteries included" language due to its comprehensive standard library.[31]

Guido van Rossum began working on Python in the late 1980s, as a successor to the ABC programming language, and first released it in 1991 as Python 0.9.0.[32] Python 2.0 was released in 2000 and introduced new features, such as list comprehensions and a garbage collection system using reference counting. Python 3.0 was released in 2008 and was a major revision of the language that is not completely backward-compatible. Python 2 was discontinued with version 2.7.18 in 2020.[33]

Python consistently ranks as one of the most popular programming languages.[34][35][36][37]"""

preprocessed_text = text.strip().replace('\n','')

t5_input_text = 'summarize: ' + preprocessed_text

tokenized_text = tokenizer.encode(t5_input_text, return_tensors='pt', max_length=300).to(device)

summary_ids = model.generate(tokenized_text, min_length=30, max_length=300)

summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

summary

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


'Python is a general-purpose programming language. it supports multiple programming paradigms, including structured, object-oriented and functional programming. it is often described as a "batteries included" language due to its comprehensive standard library.'

In [56]:
count = 0
# Define the summarization function
def summarize_text(text):
    global count
    count +=1
    if count == 2:
      print("Its working")
    if count%100 == 0:
      print(f"{count} summarized")

    # Check if the text is NaN (float) or None
    if pd.isna(text) or text is None:
        return ""  # Return an empty string for missing or None values
    preprocessed_text = str(text).strip().replace('\n', '')  # Convert to string and preprocess
    t5_input_text = 'summarize: ' + preprocessed_text
    tokenized_text = tokenizer.encode(
        t5_input_text,
        return_tensors='pt',
        max_length=350,
        truncation=True  # Explicitly enable truncation
    ).to(device)
    summary_ids = model.generate(tokenized_text, min_length=30, max_length=350)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    return summary

# Apply the summarization function to the "Abstract" column
df['Summarized'] = df['Title + Abstract'].apply(summarize_text)

df.head()

Its working
100 summarized
200 summarized
300 summarized
400 summarized
500 summarized
600 summarized
700 summarized
800 summarized
900 summarized
1000 summarized
1100 summarized


Unnamed: 0.1,Unnamed: 0,PMID,Title,Abstract,Authors,Publication Year,url,Title + Abstract,Summarized
0,0,33154830.0,Change in Estradiol Levels among Premenopausal...,The combination of luteinizing hormone-releasi...,['Lee YJ; Wu ZY; Kim HJ; Lee JW; Chung IY; Kim...,2020,https://pubmed.ncbi.nlm.nih.gov/33154830,Change in Estradiol Levels among Premenopausal...,318 patients were included in the study and an...
1,1,20207519.0,Age at menarche and premenstrual syndrome in a...,The issues involving menstruation are the topi...,['Ibralic I; Sinanovic O; Memisevic H'],2010,https://pubmed.ncbi.nlm.nih.gov/20207519,Age at menarche and premenstrual syndrome in a...,the study was conducted in a study of 31 adole...
2,2,38951612.0,Archaeological evidence of an ethnographically...,"In societies without writing, ethnographically...",['David B; Mullett R; Wright N; Stephenson B; ...,2024,https://pubmed.ncbi.nlm.nih.gov/38951612,Archaeological evidence of an ethnographically...,ethnographicly documented rituals date back to...
3,3,39014128.0,"Impact of the 2008 <math xmlns=""http://www.w3....",Tectonic plate motions drive the earthquake cy...,['Iaffaldano G; Martin de Blas J; Rui X; Stamp...,2024,https://pubmed.ncbi.nlm.nih.gov/39014128,"Impact of the 2008 <math xmlns=""http://www.w3....",tectonic plate motions drive the earthquake cy...
4,4,26670027.0,Characteristics of the menstrual cycle in 13-y...,This study aimed to investigate the characteri...,"[""Hoppenbrouwers K; Roelants M; Meuleman C; Ri...",2016,https://pubmed.ncbi.nlm.nih.gov/26670027,Characteristics of the menstrual cycle in 13-y...,study aims to investigate the characteristics ...


In [57]:
df.to_csv('data.csv')