In [1]:
#Importing all Python libraries
import requests #requests allows you to send HTTP requests to other websites using Python
from bs4 import BeautifulSoup #BeautifulSoup is used for scraping webpage material
import numpy as np #Numpy is used for calculations
import pandas as pd #Pandas is used for cleaning and manipulating dataset

In [2]:
#sends HTTP request to page on Business Insider listing Top 50 albums of all times
pg = requests.get('https://www.businessinsider.com/50-best-selling-albums-all-time-2016-9#21-led-zeppelin-physical-graffiti-30')

In [3]:
pg.status_code #Status code of 200 indicates that it was able to gain access to website(request was successful)

200

In [4]:
pg.content #shows content of webpage



In [5]:
soup = BeautifulSoup(pg.content, 'html.parser') #parses html tags of website

In [6]:
soup.title #shows the title of the webpage

<title>    50 best-selling albums of all time - Business Insider
</title>

In [7]:
h2 = soup.find_all('h2') #finds all of the text with 'h2' HTML tags

In [8]:
type(h2) #shows the data type of h2

bs4.element.ResultSet

In [9]:
len(h2) #shows the length of h2, or the ResultSet 

50

In [10]:
h2 #shows the ResultSet which consists of the artists and their respective albums

[<h2 class="slide-title-text">50. Phil Collins — "No Jacket Required"</h2>,
 <h2 class="slide-title-text">49. Matchbox Twenty — "Yourself or Someone Like You"</h2>,
 <h2 class="slide-title-text">48. Led Zeppelin — "Led Zeppelin II"</h2>,
 <h2 class="slide-title-text">47. Kenny Rogers — "Kenny Rogers' Greatest Hits"</h2>,
 <h2 class="slide-title-text">46. Kenny G — "Breathless"</h2>,
 <h2 class="slide-title-text">45. Jewel — "Pieces of You"</h2>,
 <h2 class="slide-title-text">44. Dixie Chicks — "Wide Open Spaces"</h2>,
 <h2 class="slide-title-text">43. Def Leppard — "Hysteria"</h2>,
 <h2 class="slide-title-text">42. Boyz II Men — "II"</h2>,
 <h2 class="slide-title-text">41. Bon Jovi — "Slippery When Wet"</h2>,
 <h2 class="slide-title-text">40. Whitney Houston — "Whitney Houston"</h2>,
 <h2 class="slide-title-text">39. Prince &amp; The Revolution — "Purple Rain"</h2>,
 <h2 class="slide-title-text">38. Pearl Jam — "Ten"</h2>,
 <h2 class="slide-title-text">37. Bruce Springsteen — "Bruce Sp

In [12]:
type(h2[36]) #shows the datatype of element 36 in h2 ResultSet

bs4.element.Tag

In [13]:
#The goal here is to replace the en dash with an em dash and then to split the elements at the em dash
artists = [] #creates empty list called artists 
for i in h2: #loops through h2 resultset 
    info = i.text #parses text of element
    if '–' in info: #checks to see if any of the elements has an en dash so that it can replace it with the em dash
        info = info.replace('–','—')
    artists.append(info.split('—')) #splits each of the elements at the em dash and then adds it onto empty artists list
print(artists) #prints updates list of artists with their albumns

[['50. Phil Collins ', ' "No Jacket Required"'], ['49. Matchbox Twenty ', ' "Yourself or Someone Like You"'], ['48. Led Zeppelin ', '\xa0"Led Zeppelin II"'], ['47. Kenny Rogers ', ' "Kenny Rogers\' Greatest Hits"'], ['46. Kenny G ', ' "Breathless"'], ['45. Jewel ', ' "Pieces of You"'], ['44. Dixie Chicks ', ' "Wide Open Spaces"'], ['43. Def Leppard ', ' "Hysteria"'], ['42. Boyz II Men ', '\xa0"II"'], ['41. Bon Jovi ', ' "Slippery When Wet"'], ['40. Whitney Houston ', ' "Whitney Houston"'], ['39. Prince & The Revolution ', ' "Purple Rain"'], ['38. Pearl Jam ', ' "Ten"'], ['37. Bruce Springsteen ', ' "Bruce Springsteen & E Street Band Live 1975-\'85"'], ['36. Backstreet Boys ', ' "Millennium"'], ['35. Steve Miller Band ', '\xa0"Greatest Hits 1974-1978"'], ['34. Simon & Garfunkel ', '\xa0"Simon & Garfunkel\'s Greatest Hits"'], ['33. Meat Loaf ', ' "Bat Out of Hell"'], ['32. Garth Brooks ', ' "Ropin\' The Wind"'], ['31. Britney Spears ', ' "...Baby One More Time"'], ['30. Backstreet Boys '

In [14]:
artists #shows all of the elements of the artists list 

[['50. Phil Collins ', ' "No Jacket Required"'],
 ['49. Matchbox Twenty ', ' "Yourself or Someone Like You"'],
 ['48. Led Zeppelin ', '\xa0"Led Zeppelin II"'],
 ['47. Kenny Rogers ', ' "Kenny Rogers\' Greatest Hits"'],
 ['46. Kenny G ', ' "Breathless"'],
 ['45. Jewel ', ' "Pieces of You"'],
 ['44. Dixie Chicks ', ' "Wide Open Spaces"'],
 ['43. Def Leppard ', ' "Hysteria"'],
 ['42. Boyz II Men ', '\xa0"II"'],
 ['41. Bon Jovi ', ' "Slippery When Wet"'],
 ['40. Whitney Houston ', ' "Whitney Houston"'],
 ['39. Prince & The Revolution ', ' "Purple Rain"'],
 ['38. Pearl Jam ', ' "Ten"'],
 ['37. Bruce Springsteen ',
  ' "Bruce Springsteen & E Street Band Live 1975-\'85"'],
 ['36. Backstreet Boys ', ' "Millennium"'],
 ['35. Steve Miller Band ', '\xa0"Greatest Hits 1974-1978"'],
 ['34. Simon & Garfunkel ', '\xa0"Simon & Garfunkel\'s Greatest Hits"'],
 ['33. Meat Loaf ', ' "Bat Out of Hell"'],
 ['32. Garth Brooks ', ' "Ropin\' The Wind"'],
 ['31. Britney Spears ', ' "...Baby One More Time"'],
 [

In [15]:
artists[40] #shows the 40th element of the artists list 

['10. Fleetwood Mac ', ' "Rumours"']

In [16]:
#adding all of the albums onto a separate list called albums
albums = [] #creates empty list called albums
for i in artists: #loops through artists list so that the second element, the albums can be added onto separate list
    albums.append(i[1]) #adds each album into albums
print(albums) #prints updated list

[' "No Jacket Required"', ' "Yourself or Someone Like You"', '\xa0"Led Zeppelin II"', ' "Kenny Rogers\' Greatest Hits"', ' "Breathless"', ' "Pieces of You"', ' "Wide Open Spaces"', ' "Hysteria"', '\xa0"II"', ' "Slippery When Wet"', ' "Whitney Houston"', ' "Purple Rain"', ' "Ten"', ' "Bruce Springsteen & E Street Band Live 1975-\'85"', ' "Millennium"', '\xa0"Greatest Hits 1974-1978"', '\xa0"Simon & Garfunkel\'s Greatest Hits"', ' "Bat Out of Hell"', ' "Ropin\' The Wind"', ' "...Baby One More Time"', ' "Backstreet Boys"', ' "21"', '\xa0"The Beatles 1962-1966"', ' "Supernatural"', ' "Dark Side of the Moon"', ' "Greatest Hits"', '\xa0"Born In The U.S.A."', ' "Legend"', ' "Metallica"', ' "Physical Graffiti"', ' "Saturday Night Fever" (Soundtrack)', ' "Jagged Little Pill"', ' "The Beatles 1967-1970"', ' "No Fences"', ' "Greatest Hits"', ' "Boston"', ' "The Bodyguard" (Soundtrack)', ' "Appetite for Destruction"', ' "The Beatles" ("The White Album")', ' "Come On Over"', ' "Rumours"', ' "Cracke

In [17]:
#adding all of the artists and ranks onto a separate list called art 
art = [] #creates empty list called art
for i in artists: #loops through artists list so that first element, the artist its rank, can be added onto separate list 
    art.append(i[0]) #adds each artist into art 
    
print(art) #prints updated list 

['50. Phil Collins ', '49. Matchbox Twenty ', '48. Led Zeppelin ', '47. Kenny Rogers ', '46. Kenny G ', '45. Jewel ', '44. Dixie Chicks ', '43. Def Leppard ', '42. Boyz II Men ', '41. Bon Jovi ', '40. Whitney Houston ', '39. Prince & The Revolution ', '38. Pearl Jam ', '37. Bruce Springsteen ', '36. Backstreet Boys ', '35. Steve Miller Band ', '34. Simon & Garfunkel ', '33. Meat Loaf ', '32. Garth Brooks ', '31. Britney Spears ', '30. Backstreet Boys ', '29. Adele ', '28. The Beatles ', '27. Santana ', '26. Pink Floyd ', '25. Journey ', '24. Bruce Springsteen ', '23. Bob Marley & The Wailers ', '22. Metallica ', '21. Led Zeppelin ', '20. Bee Gees ', '19. Alanis Morisette ', '18. The Beatles ', '17. Garth Brooks ', '16. Elton John ', '15. Boston ', '14. Whitney Houston ', "13. Guns N' Roses ", '12. The Beatles ', '11. Shania Twain ', '10. Fleetwood Mac ', '9. Hootie & The Blowfish ', '8. Garth Brooks ', '7. AC/DC ', '6. Pink Floyd ', '5. Led Zeppelin ', '4. Billy Joel ', '3. Eagles ', '

In [18]:
#splits the artists and ranks into two separate elements and loads it into new list called stuff
stuff = [] #creates empty list called stuff
for i in art: #loops through 'art' list 
    stuff.append(i.split('.')) #splits each element by period

print(stuff) #prints updated list with artists and ranks as separate elements within a list 
    

[['50', ' Phil Collins '], ['49', ' Matchbox Twenty '], ['48', ' Led Zeppelin '], ['47', ' Kenny Rogers '], ['46', ' Kenny G '], ['45', ' Jewel '], ['44', ' Dixie Chicks '], ['43', ' Def Leppard '], ['42', ' Boyz II Men '], ['41', ' Bon Jovi '], ['40', ' Whitney Houston '], ['39', ' Prince & The Revolution '], ['38', ' Pearl Jam '], ['37', ' Bruce Springsteen '], ['36', ' Backstreet Boys '], ['35', ' Steve Miller Band '], ['34', ' Simon & Garfunkel '], ['33', ' Meat Loaf '], ['32', ' Garth Brooks '], ['31', ' Britney Spears '], ['30', ' Backstreet Boys '], ['29', ' Adele '], ['28', ' The Beatles '], ['27', ' Santana '], ['26', ' Pink Floyd '], ['25', ' Journey '], ['24', ' Bruce Springsteen '], ['23', ' Bob Marley & The Wailers '], ['22', ' Metallica '], ['21', ' Led Zeppelin '], ['20', ' Bee Gees '], ['19', ' Alanis Morisette '], ['18', ' The Beatles '], ['17', ' Garth Brooks '], ['16', ' Elton John '], ['15', ' Boston '], ['14', ' Whitney Houston '], ['13', " Guns N' Roses "], ['12',

In [19]:
#The goal is to load the rank of each artist's album onto a separate list 
rank = []
for i in stuff:
    rank.append(i[0])
print(rank)

['50', '49', '48', '47', '46', '45', '44', '43', '42', '41', '40', '39', '38', '37', '36', '35', '34', '33', '32', '31', '30', '29', '28', '27', '26', '25', '24', '23', '22', '21', '20', '19', '18', '17', '16', '15', '14', '13', '12', '11', '10', '9', '8', '7', '6', '5', '4', '3', '2', '1']


In [20]:
#The goal is to load the artist onto a separate list
singer = []
for i in stuff:
    singer.append(i[1])
print(singer)

[' Phil Collins ', ' Matchbox Twenty ', ' Led Zeppelin ', ' Kenny Rogers ', ' Kenny G ', ' Jewel ', ' Dixie Chicks ', ' Def Leppard ', ' Boyz II Men ', ' Bon Jovi ', ' Whitney Houston ', ' Prince & The Revolution ', ' Pearl Jam ', ' Bruce Springsteen ', ' Backstreet Boys ', ' Steve Miller Band ', ' Simon & Garfunkel ', ' Meat Loaf ', ' Garth Brooks ', ' Britney Spears ', ' Backstreet Boys ', ' Adele ', ' The Beatles ', ' Santana ', ' Pink Floyd ', ' Journey ', ' Bruce Springsteen ', ' Bob Marley & The Wailers ', ' Metallica ', ' Led Zeppelin ', ' Bee Gees ', ' Alanis Morisette ', ' The Beatles ', ' Garth Brooks ', ' Elton John ', ' Boston ', ' Whitney Houston ', " Guns N' Roses ", ' The Beatles ', ' Shania Twain ', ' Fleetwood Mac ', ' Hootie & The Blowfish ', ' Garth Brooks ', ' AC/DC ', ' Pink Floyd ', ' Led Zeppelin ', ' Billy Joel ', ' Eagles ', ' Michael Jackson ', ' Eagles ']


In [21]:
rank #shows elements of rank list 

['50',
 '49',
 '48',
 '47',
 '46',
 '45',
 '44',
 '43',
 '42',
 '41',
 '40',
 '39',
 '38',
 '37',
 '36',
 '35',
 '34',
 '33',
 '32',
 '31',
 '30',
 '29',
 '28',
 '27',
 '26',
 '25',
 '24',
 '23',
 '22',
 '21',
 '20',
 '19',
 '18',
 '17',
 '16',
 '15',
 '14',
 '13',
 '12',
 '11',
 '10',
 '9',
 '8',
 '7',
 '6',
 '5',
 '4',
 '3',
 '2',
 '1']

In [22]:
singer #shows elements of singer list 

[' Phil Collins ',
 ' Matchbox Twenty ',
 ' Led Zeppelin ',
 ' Kenny Rogers ',
 ' Kenny G ',
 ' Jewel ',
 ' Dixie Chicks ',
 ' Def Leppard ',
 ' Boyz II Men ',
 ' Bon Jovi ',
 ' Whitney Houston ',
 ' Prince & The Revolution ',
 ' Pearl Jam ',
 ' Bruce Springsteen ',
 ' Backstreet Boys ',
 ' Steve Miller Band ',
 ' Simon & Garfunkel ',
 ' Meat Loaf ',
 ' Garth Brooks ',
 ' Britney Spears ',
 ' Backstreet Boys ',
 ' Adele ',
 ' The Beatles ',
 ' Santana ',
 ' Pink Floyd ',
 ' Journey ',
 ' Bruce Springsteen ',
 ' Bob Marley & The Wailers ',
 ' Metallica ',
 ' Led Zeppelin ',
 ' Bee Gees ',
 ' Alanis Morisette ',
 ' The Beatles ',
 ' Garth Brooks ',
 ' Elton John ',
 ' Boston ',
 ' Whitney Houston ',
 " Guns N' Roses ",
 ' The Beatles ',
 ' Shania Twain ',
 ' Fleetwood Mac ',
 ' Hootie & The Blowfish ',
 ' Garth Brooks ',
 ' AC/DC ',
 ' Pink Floyd ',
 ' Led Zeppelin ',
 ' Billy Joel ',
 ' Eagles ',
 ' Michael Jackson ',
 ' Eagles ']

In [23]:
albums #shows elements of albums list 

[' "No Jacket Required"',
 ' "Yourself or Someone Like You"',
 '\xa0"Led Zeppelin II"',
 ' "Kenny Rogers\' Greatest Hits"',
 ' "Breathless"',
 ' "Pieces of You"',
 ' "Wide Open Spaces"',
 ' "Hysteria"',
 '\xa0"II"',
 ' "Slippery When Wet"',
 ' "Whitney Houston"',
 ' "Purple Rain"',
 ' "Ten"',
 ' "Bruce Springsteen & E Street Band Live 1975-\'85"',
 ' "Millennium"',
 '\xa0"Greatest Hits 1974-1978"',
 '\xa0"Simon & Garfunkel\'s Greatest Hits"',
 ' "Bat Out of Hell"',
 ' "Ropin\' The Wind"',
 ' "...Baby One More Time"',
 ' "Backstreet Boys"',
 ' "21"',
 '\xa0"The Beatles 1962-1966"',
 ' "Supernatural"',
 ' "Dark Side of the Moon"',
 ' "Greatest Hits"',
 '\xa0"Born In The U.S.A."',
 ' "Legend"',
 ' "Metallica"',
 ' "Physical Graffiti"',
 ' "Saturday Night Fever" (Soundtrack)',
 ' "Jagged Little Pill"',
 ' "The Beatles 1967-1970"',
 ' "No Fences"',
 ' "Greatest Hits"',
 ' "Boston"',
 ' "The Bodyguard" (Soundtrack)',
 ' "Appetite for Destruction"',
 ' "The Beatles" ("The White Album")',
 ' "

In [24]:
album = [] #creates empty list called album
for j in albums: #loops through album list 
    if '\xa0' in j: #checks to see if characters are present in element and if so it removes those characters 
        album.append(j.replace('\xa0', ''))
    else: #otherwise it simply adds the element onto new list album 
        album.append(j)

In [25]:
album #prints updated list with Top 50 albums 

[' "No Jacket Required"',
 ' "Yourself or Someone Like You"',
 '"Led Zeppelin II"',
 ' "Kenny Rogers\' Greatest Hits"',
 ' "Breathless"',
 ' "Pieces of You"',
 ' "Wide Open Spaces"',
 ' "Hysteria"',
 '"II"',
 ' "Slippery When Wet"',
 ' "Whitney Houston"',
 ' "Purple Rain"',
 ' "Ten"',
 ' "Bruce Springsteen & E Street Band Live 1975-\'85"',
 ' "Millennium"',
 '"Greatest Hits 1974-1978"',
 '"Simon & Garfunkel\'s Greatest Hits"',
 ' "Bat Out of Hell"',
 ' "Ropin\' The Wind"',
 ' "...Baby One More Time"',
 ' "Backstreet Boys"',
 ' "21"',
 '"The Beatles 1962-1966"',
 ' "Supernatural"',
 ' "Dark Side of the Moon"',
 ' "Greatest Hits"',
 '"Born In The U.S.A."',
 ' "Legend"',
 ' "Metallica"',
 ' "Physical Graffiti"',
 ' "Saturday Night Fever" (Soundtrack)',
 ' "Jagged Little Pill"',
 ' "The Beatles 1967-1970"',
 ' "No Fences"',
 ' "Greatest Hits"',
 ' "Boston"',
 ' "The Bodyguard" (Soundtrack)',
 ' "Appetite for Destruction"',
 ' "The Beatles" ("The White Album")',
 ' "Come On Over"',
 ' "Rumo

In [26]:
alb = []
for i in album:
    alb.append(i.replace('"', '')) #removes quotation marks from those elements that have them 
print(alb)

[' No Jacket Required', ' Yourself or Someone Like You', 'Led Zeppelin II', " Kenny Rogers' Greatest Hits", ' Breathless', ' Pieces of You', ' Wide Open Spaces', ' Hysteria', 'II', ' Slippery When Wet', ' Whitney Houston', ' Purple Rain', ' Ten', " Bruce Springsteen & E Street Band Live 1975-'85", ' Millennium', 'Greatest Hits 1974-1978', "Simon & Garfunkel's Greatest Hits", ' Bat Out of Hell', " Ropin' The Wind", ' ...Baby One More Time', ' Backstreet Boys', ' 21', 'The Beatles 1962-1966', ' Supernatural', ' Dark Side of the Moon', ' Greatest Hits', 'Born In The U.S.A.', ' Legend', ' Metallica', ' Physical Graffiti', ' Saturday Night Fever (Soundtrack)', ' Jagged Little Pill', ' The Beatles 1967-1970', ' No Fences', ' Greatest Hits', ' Boston', ' The Bodyguard (Soundtrack)', ' Appetite for Destruction', ' The Beatles (The White Album)', ' Come On Over', ' Rumours', ' Cracked Rear View', ' Double Live', ' Back In Black', ' The Wall', ' Led Zeppelin IV', ' Greatest Hits Volume 1 & Volum

In [27]:
soup.find_all('p') #finds all text within webpage with the 'p' HTML tag 

[<p class=""><figure class="figure image-figure-image slideshow lead-image postload" data-e2e-name="image-figure-image" data-media-container="image" data-type="img" id="img-429803" style=""><img (-webkit-min-device-pixel-ratio:="" (max-width:="" (min-width:="" 100vw="" 1259px)="" 1260px)="" 2)="" 50vw,="" 640px,="" 960px)="" 960px,="" alt="Michael Jackson" and="" onload="createPerformanceMark('first image displayed')" sizes="(min-width: 960px) and (max-width: 1259px) 640px, (min-width: 1260px) 960px, (-webkit-min-device-pixel-ratio: 2) 50vw, 100vw" src="https://amp.businessinsider.com/images/57e41856b0ef9766008b788e-750-562.jpg" srcset="https://amp.businessinsider.com/images/57e41856b0ef9766008b788e-160-120.jpg 160w, https://amp.businessinsider.com/images/57e41856b0ef9766008b788e-320-240.jpg 320w, https://amp.businessinsider.com/images/57e41856b0ef9766008b788e-480-360.jpg 480w, https://amp.businessinsider.com/images/57e41856b0ef9766008b788e-640-480.jpg 640w, https://amp.businessinsider

In [28]:
p = soup.find_all('p') #stores list of all text with p tags into variable 'p'

In [29]:
units = [] #creates empty units list 
for i in p: #loops through all text with p tag
    if 'million' in i.text: #checks to see if the word million is found in text with p tag 
        units.append(i.text) #If million is found in text with p tag, it is added into units list 
    else:
        continue #continues through text with p tags to see if next text with p tag has the word million

In [30]:
units

['Certified units: 12 million ',
 'Certified units: 12 million ',
 'Certified units: 12 million ',
 'Certified units: 12 million ',
 'Certified units: 12 million ',
 'Certified units: 12 million ',
 'Certified units: 12 million ',
 'Certified units: 12 million ',
 'Certified units: 12 million ',
 'Certified units: 12 million ',
 'Certified units: 13 million ',
 'Certified units: 13 million ',
 'Certified units: 13 million ',
 'Certified units: 13 million ',
 'Certified units: 13 million ',
 'Certified units: 14 million ',
 'Certified units: 14 million ',
 'Certified units: 14 million ',
 'Certified units: 14 million ',
 'Certified units: 14 million ',
 'Certified units: 14 million ',
 'Certified units: 14 million ',
 'Certified units: 15 million ',
 'Certified units: 15 million ',
 'Certified units: 15 million ',
 'Certified units: 15 million ',
 'Certified units: 15 million ',
 'Certified units: 15 million ',
 'Certified units: 16 million ',
 'Certified units: 16 million ',
 'Certifie

In [31]:
len(units) #Returns the length of the updated units list 

50

In [32]:
#Removing certified units from each element in units list and adding updated elements into 'r_units' list so that only the number sold will be shown
r_units = [] #creates empty list 
for i in units: #Loops through units list 
    r_units.append(i.replace('Certified units:', ' ')) #adds updated elements into r_units list 

In [33]:
r_units #Returns updated list 

['  12 million ',
 '  12 million ',
 '  12 million ',
 '  12 million ',
 '  12 million ',
 '  12 million ',
 '  12 million ',
 '  12 million ',
 '  12 million ',
 '  12 million ',
 '  13 million ',
 '  13 million ',
 '  13 million ',
 '  13 million ',
 '  13 million ',
 '  14 million ',
 '  14 million ',
 '  14 million ',
 '  14 million ',
 '  14 million ',
 '  14 million ',
 '  14 million ',
 '  15 million ',
 '  15 million ',
 '  15 million ',
 '  15 million ',
 '  15 million ',
 '  15 million ',
 '  16 million ',
 '  16 million ',
 '  15 million ',
 '  16 million ',
 '  17 million ',
 '  17 million ',
 '  17 million ',
 '  17 million ',
 '  18 million ',
 '  18 million ',
 '  19 million ',
 '  20 million ',
 '  20 million ',
 '  21 million ',
 '  21 million ',
 '  22 million ',
 '  23 million ',
 '  23 million ',
 '  23 million ',
 '  26 million ',
 '  33 million ',
 '  36 million ']

In [34]:
len(r_units) #Returns length of updated list 

50

In [39]:
rank_df = pd.DataFrame(rank, columns=['rank']) #converts list of ranks into DataFrame

In [40]:
singer_df = pd.DataFrame(singer, columns=['singer']) #converts list of artists into DataFrame

In [41]:
album_df = pd.DataFrame(alb, columns=['album']) #converts list of albums into DataFrame

In [42]:
r_units_df = pd.DataFrame(r_units, columns=['number of units']) #converts list of number of units sold into DataFrame

In [43]:
top_albums = pd.concat([rank_df, singer_df, album_df, r_units_df], axis=1) #Combines all of the above DataFrames together 

In [44]:
top_albums.head() #Shows first 5 rows of new DataFrame 

Unnamed: 0,rank,singer,album,number of units
0,50,Phil Collins,No Jacket Required,12 million
1,49,Matchbox Twenty,Yourself or Someone Like You,12 million
2,48,Led Zeppelin,Led Zeppelin II,12 million
3,47,Kenny Rogers,Kenny Rogers' Greatest Hits,12 million
4,46,Kenny G,Breathless,12 million


In [45]:
top_albums.to_csv(r'C:\Users\The_vinchenzo\Documents\albums.csv', index=False, encoding='utf-8') #Converts DataFrame into csv file