# This book has the process of Extract and Transform the data from a CSV File Source

In [1]:
# CSV from Chhavi (KAGGLE) https://www.kaggle.com/datasets/chhavidhankhar11/amazon-books-dataset?resource=download
import pandas as pd

books_df = pd.read_csv('../data/raw/Amazon_Books.csv', index_col=0)
books_df



Unnamed: 0,Title,Author,Main Genre,Sub Genre,Type,Price,Rating,No. of People rated,URLs
0,The Complete Novel of Sherlock Holmes,Arthur Conan Doyle,"Arts, Film & Photography",Cinema & Broadcast,Paperback,₹169.00,4.4,19923.0,https://www.amazon.in/Complete-Novels-Sherlock...
1,Black Holes (L) : The Reith Lectures [Paperbac...,Stephen Hawking,"Arts, Film & Photography",Cinema & Broadcast,Paperback,₹99.00,4.5,7686.0,https://www.amazon.in/Black-Holes-Lectures-Ste...
2,The Kite Runner,Khaled Hosseini,"Arts, Film & Photography",Cinema & Broadcast,Kindle Edition,₹175.75,4.6,50016.0,https://www.amazon.in/Kite-Runner-Khaled-Hosse...
3,Greenlights: Raucous stories and outlaw wisdom...,Matthew McConaughey,"Arts, Film & Photography",Cinema & Broadcast,Paperback,₹389.00,4.6,32040.0,https://www.amazon.in/Greenlights-Raucous-stor...
4,The Science of Storytelling: Why Stories Make ...,Will Storr,"Arts, Film & Photography",Cinema & Broadcast,Paperback,₹348.16,4.5,1707.0,https://www.amazon.in/Science-Storytelling-Wil...
...,...,...,...,...,...,...,...,...,...
7923,Insight Guides Poland (Travel Guide with Free ...,Insight Travel Guide,Travel,Travel & Holiday Guides,Paperback,"₹1,326.00",4.7,16.0,https://www.amazon.in/Insight-Guides-Poland-Tr...
7924,Lonely Planet India 19 (Travel Guide),Anirban Mahapatra,Travel,Travel & Holiday Guides,Paperback,₹850.00,4.4,187.0,https://www.amazon.in/Lonely-Planet-India-Trav...
7925,Eyewitness Travel Phrase Book French (EW Trave...,DK,Travel,Travel & Holiday Guides,Paperback,₹307.00,4.5,168.0,https://www.amazon.in/Eyewitness-Travel-Phrase...
7926,Lonely Planet Australia (Travel Guide),Andrew Bain,Travel,Travel & Holiday Guides,Kindle Edition,"₹1,814.50",4.7,267.0,https://www.amazon.in/Lonely-Planet-Australia-...


## Transform data


In [2]:
# Delete duplicates
books_df.drop_duplicates(inplace=True)

# Delete N/A values
books_df.dropna(inplace=True)

books_df

Unnamed: 0,Title,Author,Main Genre,Sub Genre,Type,Price,Rating,No. of People rated,URLs
0,The Complete Novel of Sherlock Holmes,Arthur Conan Doyle,"Arts, Film & Photography",Cinema & Broadcast,Paperback,₹169.00,4.4,19923.0,https://www.amazon.in/Complete-Novels-Sherlock...
1,Black Holes (L) : The Reith Lectures [Paperbac...,Stephen Hawking,"Arts, Film & Photography",Cinema & Broadcast,Paperback,₹99.00,4.5,7686.0,https://www.amazon.in/Black-Holes-Lectures-Ste...
2,The Kite Runner,Khaled Hosseini,"Arts, Film & Photography",Cinema & Broadcast,Kindle Edition,₹175.75,4.6,50016.0,https://www.amazon.in/Kite-Runner-Khaled-Hosse...
3,Greenlights: Raucous stories and outlaw wisdom...,Matthew McConaughey,"Arts, Film & Photography",Cinema & Broadcast,Paperback,₹389.00,4.6,32040.0,https://www.amazon.in/Greenlights-Raucous-stor...
4,The Science of Storytelling: Why Stories Make ...,Will Storr,"Arts, Film & Photography",Cinema & Broadcast,Paperback,₹348.16,4.5,1707.0,https://www.amazon.in/Science-Storytelling-Wil...
...,...,...,...,...,...,...,...,...,...
7923,Insight Guides Poland (Travel Guide with Free ...,Insight Travel Guide,Travel,Travel & Holiday Guides,Paperback,"₹1,326.00",4.7,16.0,https://www.amazon.in/Insight-Guides-Poland-Tr...
7924,Lonely Planet India 19 (Travel Guide),Anirban Mahapatra,Travel,Travel & Holiday Guides,Paperback,₹850.00,4.4,187.0,https://www.amazon.in/Lonely-Planet-India-Trav...
7925,Eyewitness Travel Phrase Book French (EW Trave...,DK,Travel,Travel & Holiday Guides,Paperback,₹307.00,4.5,168.0,https://www.amazon.in/Eyewitness-Travel-Phrase...
7926,Lonely Planet Australia (Travel Guide),Andrew Bain,Travel,Travel & Holiday Guides,Kindle Edition,"₹1,814.50",4.7,267.0,https://www.amazon.in/Lonely-Planet-Australia-...


In [3]:
# Verify the type of some columns for future analysis
print(books_df.dtypes)


books_df['Price'].apply(type).value_counts()

Title                   object
Author                  object
Main Genre              object
Sub Genre               object
Type                    object
Price                   object
Rating                 float64
No. of People rated    float64
URLs                    object
dtype: object


Price
<class 'str'>    7907
Name: count, dtype: int64

In [4]:
# Function to clean the price format for future analysis
def clean_price(
    price_value: object
):
    if (isinstance(price_value, str)):
        return (price_value.replace('₹', '').replace(',',''))
    return price_value

In [5]:
# Convert the price to a float64 for future math operations
books_df['Price'] = (books_df['Price'].apply(clean_price).astype('float'))
books_df


Unnamed: 0,Title,Author,Main Genre,Sub Genre,Type,Price,Rating,No. of People rated,URLs
0,The Complete Novel of Sherlock Holmes,Arthur Conan Doyle,"Arts, Film & Photography",Cinema & Broadcast,Paperback,169.00,4.4,19923.0,https://www.amazon.in/Complete-Novels-Sherlock...
1,Black Holes (L) : The Reith Lectures [Paperbac...,Stephen Hawking,"Arts, Film & Photography",Cinema & Broadcast,Paperback,99.00,4.5,7686.0,https://www.amazon.in/Black-Holes-Lectures-Ste...
2,The Kite Runner,Khaled Hosseini,"Arts, Film & Photography",Cinema & Broadcast,Kindle Edition,175.75,4.6,50016.0,https://www.amazon.in/Kite-Runner-Khaled-Hosse...
3,Greenlights: Raucous stories and outlaw wisdom...,Matthew McConaughey,"Arts, Film & Photography",Cinema & Broadcast,Paperback,389.00,4.6,32040.0,https://www.amazon.in/Greenlights-Raucous-stor...
4,The Science of Storytelling: Why Stories Make ...,Will Storr,"Arts, Film & Photography",Cinema & Broadcast,Paperback,348.16,4.5,1707.0,https://www.amazon.in/Science-Storytelling-Wil...
...,...,...,...,...,...,...,...,...,...
7923,Insight Guides Poland (Travel Guide with Free ...,Insight Travel Guide,Travel,Travel & Holiday Guides,Paperback,1326.00,4.7,16.0,https://www.amazon.in/Insight-Guides-Poland-Tr...
7924,Lonely Planet India 19 (Travel Guide),Anirban Mahapatra,Travel,Travel & Holiday Guides,Paperback,850.00,4.4,187.0,https://www.amazon.in/Lonely-Planet-India-Trav...
7925,Eyewitness Travel Phrase Book French (EW Trave...,DK,Travel,Travel & Holiday Guides,Paperback,307.00,4.5,168.0,https://www.amazon.in/Eyewitness-Travel-Phrase...
7926,Lonely Planet Australia (Travel Guide),Andrew Bain,Travel,Travel & Holiday Guides,Kindle Edition,1814.50,4.7,267.0,https://www.amazon.in/Lonely-Planet-Australia-...


In [6]:
books_df['Rating category'] = pd.cut(books_df['Rating'], bins=[0, 3, 5, float('inf')], labels=['Bad', 'Good', 'Excellent'])
books_df

Unnamed: 0,Title,Author,Main Genre,Sub Genre,Type,Price,Rating,No. of People rated,URLs,Rating category
0,The Complete Novel of Sherlock Holmes,Arthur Conan Doyle,"Arts, Film & Photography",Cinema & Broadcast,Paperback,169.00,4.4,19923.0,https://www.amazon.in/Complete-Novels-Sherlock...,Good
1,Black Holes (L) : The Reith Lectures [Paperbac...,Stephen Hawking,"Arts, Film & Photography",Cinema & Broadcast,Paperback,99.00,4.5,7686.0,https://www.amazon.in/Black-Holes-Lectures-Ste...,Good
2,The Kite Runner,Khaled Hosseini,"Arts, Film & Photography",Cinema & Broadcast,Kindle Edition,175.75,4.6,50016.0,https://www.amazon.in/Kite-Runner-Khaled-Hosse...,Good
3,Greenlights: Raucous stories and outlaw wisdom...,Matthew McConaughey,"Arts, Film & Photography",Cinema & Broadcast,Paperback,389.00,4.6,32040.0,https://www.amazon.in/Greenlights-Raucous-stor...,Good
4,The Science of Storytelling: Why Stories Make ...,Will Storr,"Arts, Film & Photography",Cinema & Broadcast,Paperback,348.16,4.5,1707.0,https://www.amazon.in/Science-Storytelling-Wil...,Good
...,...,...,...,...,...,...,...,...,...,...
7923,Insight Guides Poland (Travel Guide with Free ...,Insight Travel Guide,Travel,Travel & Holiday Guides,Paperback,1326.00,4.7,16.0,https://www.amazon.in/Insight-Guides-Poland-Tr...,Good
7924,Lonely Planet India 19 (Travel Guide),Anirban Mahapatra,Travel,Travel & Holiday Guides,Paperback,850.00,4.4,187.0,https://www.amazon.in/Lonely-Planet-India-Trav...,Good
7925,Eyewitness Travel Phrase Book French (EW Trave...,DK,Travel,Travel & Holiday Guides,Paperback,307.00,4.5,168.0,https://www.amazon.in/Eyewitness-Travel-Phrase...,Good
7926,Lonely Planet Australia (Travel Guide),Andrew Bain,Travel,Travel & Holiday Guides,Kindle Edition,1814.50,4.7,267.0,https://www.amazon.in/Lonely-Planet-Australia-...,Good


In [8]:
books_df = books_df.rename(columns={"No. of People rated":"Number_people_rated"})
books_df

Unnamed: 0,Title,Author,Main Genre,Sub Genre,Type,Price,Rating,Number_people_rated,URLs,Rating category
0,The Complete Novel of Sherlock Holmes,Arthur Conan Doyle,"Arts, Film & Photography",Cinema & Broadcast,Paperback,169.00,4.4,19923.0,https://www.amazon.in/Complete-Novels-Sherlock...,Good
1,Black Holes (L) : The Reith Lectures [Paperbac...,Stephen Hawking,"Arts, Film & Photography",Cinema & Broadcast,Paperback,99.00,4.5,7686.0,https://www.amazon.in/Black-Holes-Lectures-Ste...,Good
2,The Kite Runner,Khaled Hosseini,"Arts, Film & Photography",Cinema & Broadcast,Kindle Edition,175.75,4.6,50016.0,https://www.amazon.in/Kite-Runner-Khaled-Hosse...,Good
3,Greenlights: Raucous stories and outlaw wisdom...,Matthew McConaughey,"Arts, Film & Photography",Cinema & Broadcast,Paperback,389.00,4.6,32040.0,https://www.amazon.in/Greenlights-Raucous-stor...,Good
4,The Science of Storytelling: Why Stories Make ...,Will Storr,"Arts, Film & Photography",Cinema & Broadcast,Paperback,348.16,4.5,1707.0,https://www.amazon.in/Science-Storytelling-Wil...,Good
...,...,...,...,...,...,...,...,...,...,...
7923,Insight Guides Poland (Travel Guide with Free ...,Insight Travel Guide,Travel,Travel & Holiday Guides,Paperback,1326.00,4.7,16.0,https://www.amazon.in/Insight-Guides-Poland-Tr...,Good
7924,Lonely Planet India 19 (Travel Guide),Anirban Mahapatra,Travel,Travel & Holiday Guides,Paperback,850.00,4.4,187.0,https://www.amazon.in/Lonely-Planet-India-Trav...,Good
7925,Eyewitness Travel Phrase Book French (EW Trave...,DK,Travel,Travel & Holiday Guides,Paperback,307.00,4.5,168.0,https://www.amazon.in/Eyewitness-Travel-Phrase...,Good
7926,Lonely Planet Australia (Travel Guide),Andrew Bain,Travel,Travel & Holiday Guides,Kindle Edition,1814.50,4.7,267.0,https://www.amazon.in/Lonely-Planet-Australia-...,Good


## Store cleaned CSV file

In [9]:
books_df.to_csv("../data/cleaned/Amazon_Books_Cleaned.csv", index=True)