## Web Scrapping Science Facts
Source: https://www.natgeokids.com/uk/category/discover/science/

In [None]:
# Importing necessary libraries 

import re
import requests
import json
import IPython
import random

from bs4 import BeautifulSoup

In [None]:
# We call the main url we want to scrape

url = 'https://www.natgeokids.com/uk/category/discover/science/'

response = requests.get(url)
response.status_code
soup = BeautifulSoup(response.content)

In [None]:
# We explore the page and find the different subpages to use

link = soup.find_all('div',attrs={'class':'card-content'})

In [None]:
# We create a list to have stored our subpages from where we will find all the information needed

urls = []

for item in link:
    url_list= item.find_all('a', href=True)
    urls.append(url_list)

In [None]:
# We clean the result to only see the urls

clean_urls = re.findall('http[s]?:\/\/(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', str(urls))

clean_urls

In [None]:
# We're only interested in urls showing facts, so we filter it down and store our final list

facts_urls = [url for url in clean_urls if 'facts' in url]

facts_urls

In [None]:
# We take now 5 different links from our facts_urls: Mars, Space, Human Body, The Earth, and Stephen Hawking

### Mars Facts

In [None]:
# We call the first url to scrape: facts about Mars

response = requests.get(url='https://www.natgeokids.com/uk/discover/science/space/facts-about-mars/')
response.status_code
soup = BeautifulSoup(response.content)

In [None]:
# Mars Facts location

mars = soup.find('div',attrs={'class':'article-sheet'}).find_all('p')

In [None]:
# List of Mars Facts

mars_facts = []

for item in mars:
    facts = item.get_text().replace('\xa0',' ')
    mars_facts.append(facts)

In [None]:
r = re.compile('\d.')

mars_facts_cleaned = list(filter(r.match, mars_facts))

mars_facts_cleaned

### Space Facts

In [None]:
response = requests.get(url='https://www.natgeokids.com/uk/discover/science/space/ten-facts-about-space/')
response.status_code
soup = BeautifulSoup(response.content)

In [None]:
# Space Facts location

space = soup.find('div',attrs={'class':'article-sheet'}).find_all('p')

In [None]:
# List of Space Facts

space_facts = []

for item in space:
    facts = item.get_text().replace('\xa0',' ')
    space_facts.append(facts)

In [None]:
r = re.compile('\d.')

space_facts_cleaned = list(filter(r.match, space_facts))

space_facts_cleaned

### Human Body Facts

In [None]:
response = requests.get(url='https://www.natgeokids.com/uk/discover/science/general-science/15-facts-about-the-human-body/')
response.status_code
soup = BeautifulSoup(response.content)

In [None]:
# Human Body Facts location

human = soup.find('div',attrs={'class':'article-sheet'}).find_all('p')

In [None]:
# List of Human Body Facts

human_facts = []

for item in human:
    facts = item.get_text()
    human_facts.append(facts)

In [None]:
r = re.compile('\d.')

first_clean = list(filter(r.match, human_facts))

In [None]:
second_clean = []

for item in first_clean:
    fact = item.split('\n')
    second_clean.append(fact)

In [None]:
human_facts_cleaned = [fact for innerList in second_clean for fact in innerList]

human_facts_cleaned

### Earth Facts

In [None]:
response = requests.get(url='https://www.natgeokids.com/uk/discover/science/space/facts-about-the-earth/')
response.status_code
soup = BeautifulSoup(response.content)

In [None]:
# Earth Facts location

earth = soup.find_all('span',attrs={'class':'m_157449401609115947gmail-s2'})

In [None]:
# List of Space Facts

earth_facts = []

for item in earth:
    facts = item.get_text().replace('\xa0',' ').replace('\n','')
    earth_facts.append(facts)

In [None]:
r = re.compile('\d.')

earth_facts_cleaned = list(filter(r.match, earth_facts))

earth_facts_cleaned

### Stephen Hawking Facts

In [None]:
response = requests.get(url='https://www.natgeokids.com/uk/discover/science/general-science/stephen-hawking-facts/')
response.status_code
soup = BeautifulSoup(response.content)

In [None]:
# Stephen Hawking Facts location

stephen = soup.find('div',attrs={'class':'article-sheet'}).find_all('p')

In [None]:
# List of Stephen Hawking Facts

stephen_facts = []

for item in stephen:
    facts = item.get_text().replace('\xa0',' ')
    stephen_facts.append(facts)

In [None]:
r = re.compile('\d.')

stephen_facts_cleaned = list(filter(r.match, stephen_facts))

stephen_facts_cleaned

## Science Facts into Banana Language

In [None]:
# Function to get random facts out of a selected category

cat_dict = {'Mars':mars_facts_cleaned,'Space':space_facts_cleaned,'Human Body':human_facts_cleaned,
            'The Earth':earth_facts_cleaned,'Stephen Hawking':stephen_facts_cleaned}

def display_category_fact(category):
    global fact_english
    fact_english = cat_dict[category][random.choice(range(len(cat_dict[category])))]
    params = {'text': fact_english}
    response = requests.get('https://api.funtranslations.com/translate/minion.json', params)
    fact_banana = json.loads(response.content)['contents']['translated']
    print(fact_banana)

In [None]:
# Display of random facts based on category

categories = ['Mars','Space','Human Body','The Earth','Stephen Hawking']

def choose_category():
    x = input('Time to learn! Choose a category: ') 
    if x in categories:
        display_category_fact(x)
    else: choose_category()

choose_category()

In [None]:
print(fact_english)