In [1]:
import pandas as pd

In [2]:
# read csv files

books = pd.read_csv("books.csv")
books_2 = pd.read_csv("books_2.csv")

In [3]:
# fill isbn column with leading zeros to make 10 digits (this csv file is missing the zeros)

books_2["isbn"] = books_2["isbn"].str.zfill(10)

In [4]:
# change isbn columns to strings (isbn numbers do sometimes contain the letter X)

books["isbn10"] = books["isbn10"].astype(str)
books_2["isbn"] = books_2["isbn"].astype(str)

In [5]:
#rename column for books dataframe to match books_2

books = books.rename(columns={"isbn10": "isbn"})


In [6]:
# inner join the dataframes on the isbn column

merged = pd.merge(books_2, books, on=["isbn"])

In [7]:
# display merged data

merged

Unnamed: 0,bookID,title_x,authors_x,average_rating_x,isbn,isbn13_x,language_code,num_pages,ratings_count_x,text_reviews_count,...,title_y,subtitle,authors_y,categories,thumbnail,description,published_year,average_rating_y,num_pages.1,ratings_count_y
0,1,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling/Mary GrandPré,4.57,0439785960,9.78044E+12,eng,652,2095690,27591,...,Harry Potter and the Half-Blood Prince (Book 6),,"Rowling, J.K.",Juvenile Fiction,http://books.google.com/books/content?id=QzI0B...,When Harry Potter and the Half-Blood Prince op...,2015.0,4.56,652.0,1944099.0
1,2,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling/Mary GrandPré,4.49,0439358078,9.78044E+12,eng,870,2153167,29221,...,Harry Potter and the Order of the Phoenix (Boo...,,"Rowling, J.K.",Juvenile Fiction,http://books.google.com/books/content?id=OIJ5B...,"In Harry Potter and the Order of the Phoenix, ...",2015.0,4.49,870.0,1996446.0
2,4,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling,4.42,0439554896,9.78044E+12,eng,352,6333,244,...,Harry Potter and the Chamber of Secrets,,J. K. Rowling;Mary GrandPre,Juvenile Fiction,http://books.google.com/books/content?id=h2Y-P...,When the Chamber of Secrets is opened again at...,2003.0,4.41,352.0,6267.0
3,5,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling/Mary GrandPré,4.56,043965548X,9.78044E+12,eng,435,2339585,36325,...,Harry Potter and the Prisoner of Azkaban (Book 3),,"Rowling, J.K.",Juvenile Fiction,http://books.google.com/books/content?id=IZN5B...,"For twelve long years, the dread fortress of A...",2015.0,4.55,435.0,2149872.0
4,8,Harry Potter Boxed Set Books 1-5 (Harry Potte...,J.K. Rowling/Mary GrandPré,4.78,0439682584,9.78044E+12,eng,2690,41428,164,...,Harry Potter,"5 Years of Magic, Adventure, and Mystery at Ho...",J. K. Rowling,Juvenile Fiction,http://books.google.com/books/content?id=DAAAA...,The first five years of Harry Potter magic are...,2004.0,4.78,2690.0,38872.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5681,45546,Undaunted Courage: The Pioneering First Missio...,Stephen E. Ambrose,4.21,074347788X,9.78074E+12,eng,592,41236,1830,...,Undaunted Courage,The Pioneering First Mission to Explore Americ...,Stephen E. Ambrose,Explorers,http://books.google.com/books/content?id=1egIO...,'This was much more than a bunch of guys out o...,2003.0,4.22,592.0,38555.0
5682,45564,Jonny Reb & Billy Yank,Alexander Hunter,4.11,1568520808,9.78157E+12,eng,635,14,2,...,Johnny Reb and Billy Yank,,Alexander Hunter,United States,http://books.google.com/books/content?id=z4LeI...,,1904.0,4.11,635.0,13.0
5683,45630,Whores for Gloria,William T. Vollmann,3.69,0140231579,9.78014E+12,en-US,160,932,111,...,Whores for Gloria,,William T. Vollmann,Fiction,http://books.google.com/books/content?id=mQA-P...,From the acclaimed author of The Rainbow Stori...,1994.0,3.69,160.0,896.0
5684,45631,Expelled from Eden: A William T. Vollmann Reader,William T. Vollmann/Larry McCaffery/Michael He...,4.06,1560254416,9.78156E+12,eng,512,156,20,...,Expelled from Eden,A William T. Vollmann Reader,Perseus,Fiction,http://books.google.com/books/content?id=Q_XQs...,"No stranger to living and writing on the edge,...",2004.0,4.05,512.0,156.0


In [8]:
# display all columns on the merged dataframe

merged.columns

Index(['bookID', 'title_x', 'authors_x', 'average_rating_x', 'isbn',
       'isbn13_x', 'language_code', '  num_pages', 'ratings_count_x',
       'text_reviews_count', 'publication_date', 'publisher', 'Unnamed: 12',
       'isbn13_y', 'title_y', 'subtitle', 'authors_y', 'categories',
       'thumbnail', 'description', 'published_year', 'average_rating_y',
       'num_pages', 'ratings_count_y'],
      dtype='object')

In [9]:
# drop duplicate/unnecessary columns

cleaned = merged.drop(["title_y", "subtitle", "authors_y", "isbn13_y", "average_rating_y", "  num_pages", \
                      "ratings_count_y", "Unnamed: 12"], axis=1)

In [10]:
# rename columns

cleaned = cleaned.rename(columns={"title_x": "title", "authors_x": "authors", "average_rating_x": "average_rating", \
                                 "isbn13_x": "isbn13", "ratings_count_x": "ratings_count"})

In [11]:
# only keep rows with English as the language

cleaned = cleaned[(cleaned["language_code"] == "eng") | (cleaned["language_code"] == "en-US") | \
                   (cleaned["language_code"] == "en-GB") | (cleaned["language_code"] == "en_CA") | \
                   (cleaned["language_code"] == "enm")]

In [12]:
# reset the index

cleaned = cleaned.reset_index(drop=True)

In [13]:
# show cleaned dataframe

cleaned

Unnamed: 0,bookID,title,authors,average_rating,isbn,isbn13,language_code,ratings_count,text_reviews_count,publication_date,publisher,categories,thumbnail,description,published_year,num_pages
0,1,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling/Mary GrandPré,4.57,0439785960,9.78044E+12,eng,2095690,27591,9/16/2006,Scholastic Inc.,Juvenile Fiction,http://books.google.com/books/content?id=QzI0B...,When Harry Potter and the Half-Blood Prince op...,2015.0,652.0
1,2,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling/Mary GrandPré,4.49,0439358078,9.78044E+12,eng,2153167,29221,9/1/2004,Scholastic Inc.,Juvenile Fiction,http://books.google.com/books/content?id=OIJ5B...,"In Harry Potter and the Order of the Phoenix, ...",2015.0,870.0
2,4,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling,4.42,0439554896,9.78044E+12,eng,6333,244,11/1/2003,Scholastic,Juvenile Fiction,http://books.google.com/books/content?id=h2Y-P...,When the Chamber of Secrets is opened again at...,2003.0,352.0
3,5,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling/Mary GrandPré,4.56,043965548X,9.78044E+12,eng,2339585,36325,5/1/2004,Scholastic Inc.,Juvenile Fiction,http://books.google.com/books/content?id=IZN5B...,"For twelve long years, the dread fortress of A...",2015.0,435.0
4,8,Harry Potter Boxed Set Books 1-5 (Harry Potte...,J.K. Rowling/Mary GrandPré,4.78,0439682584,9.78044E+12,eng,41428,164,9/13/2004,Scholastic,Juvenile Fiction,http://books.google.com/books/content?id=DAAAA...,The first five years of Harry Potter magic are...,2004.0,2690.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5675,45546,Undaunted Courage: The Pioneering First Missio...,Stephen E. Ambrose,4.21,074347788X,9.78074E+12,eng,41236,1830,10/6/2003,Simon & Schuster,Explorers,http://books.google.com/books/content?id=1egIO...,'This was much more than a bunch of guys out o...,2003.0,592.0
5676,45564,Jonny Reb & Billy Yank,Alexander Hunter,4.11,1568520808,9.78157E+12,eng,14,2,5/31/1998,Not Avail,United States,http://books.google.com/books/content?id=z4LeI...,,1904.0,635.0
5677,45630,Whores for Gloria,William T. Vollmann,3.69,0140231579,9.78014E+12,en-US,932,111,2/1/1994,Penguin Books,Fiction,http://books.google.com/books/content?id=mQA-P...,From the acclaimed author of The Rainbow Stori...,1994.0,160.0
5678,45631,Expelled from Eden: A William T. Vollmann Reader,William T. Vollmann/Larry McCaffery/Michael He...,4.06,1560254416,9.78156E+12,eng,156,20,12/21/2004,Da Capo Press,Fiction,http://books.google.com/books/content?id=Q_XQs...,"No stranger to living and writing on the edge,...",2004.0,512.0


In [14]:
# save cleaned dataframe to a csv file

cleaned.to_csv("cleaned_books.csv")