In [1]:
#data scraping from google search
import urllib
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
class GoogleSearchScraping(object):
    USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"

    #Get search query
    def getQuery(self, query):
        return f"/search?q={query}"
    
    #Get search URL
    def getURL(self, query):   
        query = query.replace(' ', '+')
        return f"https://google.com{query}"
    
    #fetch data from google search
    def getData(self, query):
        headers = {"user-agent" : self.USER_AGENT}
        response = requests.get(self.getURL(query), headers=headers)
        #get response status
        if response.status_code == 200:
            beautifulSoup = BeautifulSoup(response.content, "html.parser")
        return beautifulSoup

In [3]:
#scraping google search  
class GooglePeopleBio(GoogleSearchScraping):
    def __init__(self, fileName):
        super(GooglePeopleBio, self).__init__()
        #generate CSV file by class loading
        self.generateCSVFile(fileName)
        
    #scraping people Bio bar list
    def getPeopleList(self, query):
        soup = self.getData(self.getQuery(query))
        results = []
        bar = soup.find(id='appbar')
        for a in bar.find_all('a', href=True):
            results.append(a['href'])
        return results
    
    #scraping bio container
    def getPeopleDetails(self, query):
        results=[]
        PeopleList = self.getPeopleList(query)
        for people in PeopleList:
            DetailsSoup = self.getData(people)
            details = DetailsSoup.find(id='wp-tabs-container')
            if details is not None:
                try:
                    results.append(self.dataFormat(details))
                except:
                      pass
        df = pd.DataFrame(results, columns=self.columns)
        df.to_csv(self.fileName, mode='a', index=False, header=False)
        return df
    
    # create new file
    def generateCSVFile(self, fileName): 
        self.fileName = fileName+".csv"
        self.columns = "name birthdate twitter".split()
        df = pd.DataFrame([], columns=self.columns)
        df.to_csv(self.fileName, mode="w", index=False, header=True)
    
    #format scraping result
    def dataFormat(self, details):
        socialMediaResults = []
        name = details.find('h2', attrs = {'class' : 'qrShPb'}).find('span').text
        birthdate = details.find('div', attrs = {'data-attrid' : 'kc:/people/person:born'}).text
        socialMedia = details.find('div', attrs = {'data-attrid' : 'kc:/common/topic:social media presence'})
        for a in socialMedia.find_all('a', href=True):
            href = a['href']
            if href.find("twitter") != -1:
                   return (name, birthdate, href)
        return (name, birthdate, '')
    

In [4]:
query = 'Celebrities bio'
celebrities = GooglePeopleBio(query)
celebrities.getPeopleDetails(query)

Unnamed: 0,name,birthdate,twitter
0,Kim Kardashian,"Born: October 21, 1980 (age 39Â years), Los An...",https://twitter.com/KimKardashian
1,Jennifer Lopez,"Born: July 24, 1969 (age 51 years), Castle Hil...",https://twitter.com/JLo
2,Donald Trump,"Born: June 14, 1946 (age 74 years), Jamaica Ho...",https://twitter.com/realDonaldTrump
3,Rihanna,"Born: February 20, 1988 (age 32 years), Saint ...",https://twitter.com/rihanna
4,Elton John,"Born: March 25, 1947 (age 73Â years), Pinner, ...",https://twitter.com/eltonofficial
5,Nicole Richie,"Born: September 21, 1981 (age 39Â years), Berk...",https://twitter.com/nicolerichie
6,Oprah Winfrey,"Born: January 29, 1954 (age 66 years), Koscius...",https://twitter.com/Oprah
7,Justin Bieber,"Born: March 1, 1994 (age 26Â years), St. Josep...",https://twitter.com/justinbieber
8,Ellen DeGeneres,"Born: January 26, 1958 (age 62 years), Metairi...",https://twitter.com/TheEllenShow
9,Beyoncé,"Born: September 4, 1981 (age 39 years), Housto...",https://twitter.com/Beyonce


In [5]:
celebrities.getPeopleDetails('Actors')

Unnamed: 0,name,birthdate,twitter
0,Tom Hanks,"Born: July 9, 1956 (age 64 years), Concord, CA",https://twitter.com/tomhanks
1,Will Smith,"Born: September 25, 1968 (age 52 years), Phila...",
2,Jamie Foxx,"Born: December 13, 1967 (age 52 years), Terrel...",https://twitter.com/iamjamiefoxx
3,Leonardo DiCaprio,"Born: November 11, 1974 (age 45 years), Los An...",https://twitter.com/LeoDiCaprio
4,Chris Evans,"Born: June 13, 1981 (age 39 years), Boston, MA",https://twitter.com/ChrisEvans
5,Nicole Kidman,"Born: June 20, 1967 (age 53 years), Honolulu, HI",
6,Hugh Jackman,"Born: October 12, 1968 (age 52 years), Sydney,...",https://twitter.com/RealHughJackman
7,Margot Robbie,"Born: July 2, 1990 (age 30 years), Dalby, Aust...",https://twitter.com/MargotRobbie
8,Samuel L. Jackson,"Born: December 21, 1948 (age 71 years), Washin...",https://twitter.com/SamuelLJackson
9,Timothée Chalamet,"Born: December 27, 1995 (age 24 years), Manhat...",https://twitter.com/RealChalamet


In [6]:
celebrities.getPeopleDetails('Professional Athletes')

Unnamed: 0,name,birthdate,twitter
0,LeBron James,"Born: December 30, 1984 (age 35 years), Akron, OH",https://twitter.com/KingJames
1,Tiger Woods,"Born: December 30, 1975 (age 44 years), Cypres...",https://twitter.com/tigerwoods
2,Stephen Curry,"Born: March 14, 1988 (age 32 years), Akron, OH",https://twitter.com/StephenCurry30
3,Cristiano Ronaldo,"Born: February 5, 1985 (age 35 years), Hospita...",https://twitter.com/Cristiano
4,Floyd Mayweather,"Born: February 24, 1977 (age 43 years), Grand ...",https://twitter.com/FloydMayweather
5,Serena Williams,"Born: September 26, 1981 (age 39 years), Sagin...",https://twitter.com/serenawilliams
6,Lionel Messi,"Born: June 24, 1987 (age 33 years), Rosario, A...",
7,David Beckham,"Born: May 2, 1975 (age 45 years), Whipps Cross...",
8,Magic Johnson,"Born: August 14, 1959 (age 61 years), Lansing, MI",https://twitter.com/MagicJohnson
9,Alex Rodriguez,"Born: July 27, 1975 (age 45 years), Washington...",https://twitter.com/AROD


In [7]:
celebrities.getPeopleDetails('Business People Biography')

Unnamed: 0,name,birthdate,twitter
0,Warren Buffett,"Born: August 30, 1930 (age 90 years), Omaha, NE",https://twitter.com/WarrenBuffett
1,Donald Trump,"Born: June 14, 1946 (age 74 years), Jamaica Ho...",https://twitter.com/realDonaldTrump
2,Richard Branson,"Born: July 18, 1950 (age 70 years), Blackheath...",https://twitter.com/richardbranson
3,Mark Zuckerberg,"Born: May 14, 1984 (age 36 years), White Plain...",
4,Robert Iger,"Born: February 10, 1951 (age 69 years), New Yo...",https://twitter.com/robertiger
5,Kenneth Langone,"Born: September 16, 1935 (age 85 years), Rosly...",https://twitter.com/kenlangone
6,Mike Bloomberg,"Born: February 14, 1942 (age 78 years), Bright...",https://twitter.com/MikeBloomberg
7,Arianna Huffington,"Born: July 15, 1950 (age 70 years), Athens, Gr...",https://twitter.com/ariannahuff
8,Eric Schmidt,"Born: April 27, 1955 (age 65 years), Falls Chu...",https://twitter.com/ericschmidt
9,Ted Turner,"Born: November 19, 1938 (age 81 years), Cincin...",https://twitter.com/tedturneriii


In [8]:
celebrities.getPeopleDetails('Famous people')

Unnamed: 0,name,birthdate,twitter
0,Donald Trump,"Born: June 14, 1946 (age 74 years), Jamaica Ho...",https://twitter.com/realDonaldTrump
1,Barack Obama,"Born: August 4, 1961 (age 59 years), Kapiʻolan...",https://twitter.com/BarackObama
2,Oprah Winfrey,"Born: January 29, 1954 (age 66 years), Koscius...",https://twitter.com/Oprah
3,Tom Hanks,"Born: July 9, 1956 (age 64 years), Concord, CA",https://twitter.com/tomhanks
4,Justin Bieber,"Born: March 1, 1994 (age 26 years), St. Joseph...",https://twitter.com/justinbieber
5,Joe Biden,"Born: November 20, 1942 (age 77 years), Scrant...",https://twitter.com/JoeBiden
6,Taylor Swift,"Born: December 13, 1989 (age 30 years), West R...",https://twitter.com/taylorswift13
7,Will Smith,"Born: September 25, 1968 (age 52 years), Phila...",
8,Jay-Z,"Born: December 4, 1969 (age 50 years), Brookly...",https://twitter.com/S_C_
9,Kim Kardashian,"Born: October 21, 1980 (age 39Â years), Los An...",https://twitter.com/KimKardashian


In [9]:
celebrities.getPeopleDetails('american football players biography')

Unnamed: 0,name,birthdate,twitter
0,Tom Brady,"Born: August 3, 1977 (age 43 years), San Mateo...",https://twitter.com/TomBrady
1,Jerry Rice,"Born: October 13, 1962 (age 58 years), Starkvi...",https://twitter.com/jerryrice
2,Steve Young,"Born: October 11, 1961 (age 59 years), Salt La...",https://twitter.com/steveyoungqb
3,Tony Dungy,"Born: October 6, 1955 (age 65 years), Jackson, MI",https://twitter.com/tonydungy
4,Brett Favre,"Born: October 10, 1969 (age 51 years), Gulfpor...",https://twitter.com/brettfavre
5,Urban Meyer,"Born: July 10, 1964 (age 56 years), Toledo, OH",https://twitter.com/coachurbanmeyer
6,Tim Tebow,"Born: August 14, 1987 (age 33 years), Makati, ...",https://twitter.com/TimTebow
7,Emmitt Smith,"Born: May 15, 1969 (age 51 years), Pensacola, FL",https://twitter.com/emmittsmith22
8,Ray Lewis,"Born: May 15, 1975 (age 45 years), Bartow, FL",https://twitter.com/raylewis
9,Deion Sanders,"Born: August 9, 1967 (age 53 years), Fort Myer...",http://www.twitter.com/deionsanders


In [10]:
celebrities.getPeopleDetails('basketball players')

Unnamed: 0,name,birthdate,twitter
0,LeBron James,"Born: December 30, 1984 (age 35 years), Akron, OH",https://twitter.com/KingJames
1,Shaquille O'Neal,"Born: March 6, 1972 (age 48 years), Newark, NJ",https://twitter.com/SHAQ
2,Kareem Abdul-Jabbar,"Born: April 16, 1947 (age 73 years), New York, NY",https://twitter.com/kaj33
3,Kevin Durant,"Born: September 29, 1988 (age 32 years), Washi...",https://twitter.com/KDTrey5
4,Magic Johnson,"Born: August 14, 1959 (age 61 years), Lansing, MI",https://twitter.com/MagicJohnson
5,Bill Russell,"Born: February 12, 1934 (age 86 years), West M...",https://twitter.com/realbillrussell
6,Stephen Curry,"Born: March 14, 1988 (age 32 years), Akron, OH",https://twitter.com/StephenCurry30
7,Chris Paul,"Born: May 6, 1985 (age 35 years), Winston-Sale...",https://twitter.com/CP3
8,Russell Westbrook,"Born: November 12, 1988 (age 31 years), Long B...",https://twitter.com/russwest44
9,Dwyane Wade,"Born: January 17, 1982 (age 38 years), Chicago...",https://twitter.com/DwyaneWade


In [11]:
celebrities.getPeopleDetails('Models biography')

Unnamed: 0,name,birthdate,twitter
0,Naomi Campbell,"Born: May 22, 1970 (age 50 years), Streatham, ...",https://twitter.com/NaomiCampbell
1,Heidi Klum,"Born: June 1, 1973 (age 47 years), Bergisch Gl...",https://twitter.com/heidiklum
2,Gisele Bündchen,"Born: July 20, 1980 (age 40 years), Horizontin...",https://twitter.com/giseleofficial
3,Cindy Crawford,"Born: February 20, 1966 (age 54 years), DeKalb...",https://twitter.com/CindyCrawford
4,Brooke Shields,"Born: May 31, 1965 (age 55 years), Manhattan, ...",https://twitter.com/BrookeShields
5,Grace Jones,"Born: May 19, 1948 (age 72 years), Spanish Tow...",https://twitter.com/GraceJones
6,Pamela Anderson,"Born: July 1, 1967 (age 53 years), Ladysmith, ...",https://twitter.com/pamfoundation
7,Katie Price,"Born: May 22, 1978 (age 42Â years), Brighton, ...",https://twitter.com/KatiePrice
8,Victoria Beckham,"Born: April 17, 1974 (age 46 years), Harlow, U...",https://twitter.com/victoriabeckham
9,Kim Kardashian,"Born: October 21, 1980 (age 39Â years), Los An...",https://twitter.com/KimKardashian


In [12]:
data = pd.read_csv("Celebrities bio.csv")
#Clean up the duplicate record
data = data.drop_duplicates(subset=['twitter'])
data

Unnamed: 0,name,birthdate,twitter
0,Kim Kardashian,"Born: October 21, 1980 (age 39Â years), Los An...",https://twitter.com/KimKardashian
1,Jennifer Lopez,"Born: July 24, 1969 (age 51 years), Castle Hil...",https://twitter.com/JLo
2,Donald Trump,"Born: June 14, 1946 (age 74 years), Jamaica Ho...",https://twitter.com/realDonaldTrump
3,Rihanna,"Born: February 20, 1988 (age 32 years), Saint ...",https://twitter.com/rihanna
4,Elton John,"Born: March 25, 1947 (age 73Â years), Pinner, ...",https://twitter.com/eltonofficial
...,...,...,...
231,Veronica Webb,"Born: February 25, 1965 (age 55 years), Detroi...",https://twitter.com/veronicawebb
233,Inès de La Fressange,"Born: August 11, 1957 (age 63 years), Gassin, ...",https://twitter.com/lafressange
234,Ashley Graham,"Born: October 30, 1987 (age 32 years), Lincoln...",https://twitter.com/ashleygraham
235,Beverly Johnson,"Born: October 13, 1952 (age 68 years), Buffalo...",https://twitter.com/beverlyjohnson1
