In [57]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [58]:
def scrape_google_scholar(query):
    base_url = "https://scholar.google.com"
    url = f"{base_url}/scholar?q={query}"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    articles = []
    for result in soup.find_all('div', class_='gs_ri'):
        title = result.find('h3', class_='gs_rt').text.strip()
        abstract = result.find('div', class_='gs_rs')
        abstract = abstract.text.strip() if abstract else None
        authors = result.find('div', class_='gs_a').text.strip()
        link = result.find('a')['href']
        articles.append({'Title': title, 'Abstract': abstract, 'Authors': authors, 'Link': link})
    
    return articles

In [59]:
def scrape_sciencedirect(query):
    base_url = "https://www.sciencedirect.com"
    url = f"{base_url}/search?qs={query}"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "en-US,en;q=0.9",
        "Referer": "https://www.sciencedirect.com/",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1"
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    articles = []
    for result in soup.find_all('a', class_='result-list-title-link'):
        title = result.text.strip()
        link = result['href']
        authors = result.find_next('ul', class_='author-group')
        authors = authors.text.strip() if authors else None
        date = result.find_next('dd', class_='publication-date')
        date = date.text.strip() if date else None
        articles.append({'Title': title, 'Authors': authors, 'Date': date, 'Link': link})
    
    return articles


In [60]:
def scrape_ieee_xplore(query):
    base_url = "https://ieeexplore.ieee.org"
    url = f"{base_url}/search/searchresult.jsp?newsearch=true&queryText={query}"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "en-US,en;q=0.9",
        "Referer": "https://ieeexplore.ieee.org/",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1"
        }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    for result in soup.find_all('div', class_='List-results-items'):
        title = result.find('h2', class_='title').text.strip()
        authors = result.find('p', class_='authors').text.strip()
        publication_info = result.find('p', class_='publisher').text.strip()
        link = base_url + result.find('a', class_='document-title')['href']
        articles.append({'Title': title, 'Authors': authors, 'Publication Info': publication_info, 'Link': link})

    return articles

In [61]:
query = "engineering"
google_scholar_results = scrape_google_scholar(query)
sciencedirect_results = scrape_sciencedirect(query)
ieee_xplore_results = scrape_ieee_xplore(query)

In [62]:
# Convert results to pandas DataFrame
google_scholar_df = pd.DataFrame(google_scholar_results)
sciencedirect_df = pd.DataFrame(sciencedirect_results)
ieee_xplore_df = pd.DataFrame(ieee_xplore_results)

In [63]:
def display_results(df: pd.DataFrame):
    if df.empty:
        print("Got blocked by the website. Try again later.")
    else:
        print(df)
    return

In [64]:
display_results(google_scholar_df)

                                               Title  \
0               Engineering solventogenic clostridia   
1                           Biocommodity engineering   
2           [หนังสือ][B] Oceanographical engineering   
3                [หนังสือ][B] Engineering in history   
4  [หนังสือ][B] Micromanufacturing engineering an...   
5             [หนังสือ][B] Aquacultural engineering.   
6  [หนังสือ][B] Bioseparations science and engine...   
7                [หนังสือ][B] Engineering statistics   
8  Reliability engineering: Old problems and new ...   
9         [หนังสือ][B] System engineering management   

                                            Abstract  \
0  … Pathway engineering efforts have resulted in...   
1  The application of biotechnology to the produc...   
2  As is the case with many modern fields of stud...   
3  … engineering:“It is customary to think of eng...   
4  … , and thin film fabrication Outlines system ...   
5  This book is divided into 2 parts which cove

In [65]:
display_results(sciencedirect_df)

Got blocked by the website. Try again later.


In [66]:
display_results(ieee_xplore_df)

Got blocked by the website. Try again later.
