## Importing libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings

warnings.filterwarnings('ignore')

sns.set()

## Data preprocessing

In [2]:
books = pd.read_csv('data_buku_indo2.csv')

ratings = pd.read_csv('ratings.csv')

In [3]:
books.columns

Index(['Unnamed: 0', 'Judul', 'Penulis', 'No. ISBN', 'Penerbit',
       'Tanggal terbit', 'Kategori', 'Description', 'img_small_link',
       'img_large_link'],
      dtype='object')

In [4]:
columns = ['Judul','No. ISBN', 'Penulis', 'Penerbit','Tanggal terbit', 'Kategori', 'Description', 'img_small_link', 'img_large_link']

books_new = books[columns]

books_new.head()

Unnamed: 0,Judul,No. ISBN,Penulis,Penerbit,Tanggal terbit,Kategori,Description,img_small_link,img_large_link
0,PROPERTI KO.MO.DO ( PIPO Hargiyanto ),9786235824130,PIPO HARGIYANTO,Bukukita.com,November - 2022,Motivasi,Properti KO.MO.DO yang ditulis langsung oleh P...,Properti KO.MO.DO yang ditulis langsung oleh P...,https://www.bukukita.com/babacms/displaybuku/1...
1,Adabud Dunya Wad Din,978627163398,Imam al-Mawardi,Alifia Books,September - 2020,Islam,"Etika (akhlak, adab) adalah mahkota dan perhia...","Etika (akhlak, adab) adalah mahkota dan perhia...",https://www.bukukita.com/babacms/displaybuku/1...
2,AL-QUR'AN & MAKNANYA,9786237713272,M. Quraish Shihab,Lentera Hati,September - 2021,Al-Quran,Dilengkapi :>> Asbabun Nuzul>> Makna dan Tujua...,Dilengkapi :>> Asbabun Nuzul>> Makna dan Tujua...,https://www.bukukita.com/babacms/displaybuku/1...
3,Baca Buku Ini Saat Engkau Ingin Berubah,9786232441118,Rahma Kusharjanti,Psikologi Corner,Juli - 2020,Pengembangan Diri,"Apakah kamu suka ""motor""?Apakah kamu langganan...","Apakah kamu suka ""motor""?Apakah kamu langganan...",https://www.bukukita.com/babacms/displaybuku/1...
4,Banjir Besar Zaman Nabi Nuh ( Riwayat dalam Al...,9786232201439,MANSUR ABDUL HAKIM,Alvabet,Februari - 2023,Sejarah Agama,Kisah Air bah besar pertama pada zaman kuno te...,Kisah Air bah besar pertama pada zaman kuno te...,https://www.bukukita.com/babacms/displaybuku/1...


In [10]:
columns = ['ISBN', 'Book-Author', 'Year-Of-Publication', 'Book-Title', 'Image-URL-S']

books_new = books[columns]

books_new.head()

Unnamed: 0,ISBN,Book-Author,Year-Of-Publication,Book-Title,Image-URL-S
0,195153448,Mark P. O. Morford,2002,Classical Mythology,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Richard Bruce Wright,2001,Clara Callan,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Carlo D'Este,1991,Decision in Normandy,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Gina Bari Kolata,1999,Flu: The Story of the Great Influenza Pandemic...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,E. J. W. Barber,1999,The Mummies of Urumchi,http://images.amazon.com/images/P/0393045218.0...


In [5]:
books_new.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2311 entries, 0 to 2310
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Judul           2311 non-null   object
 1   No. ISBN        2311 non-null   object
 2   Penulis         2311 non-null   object
 3   Penerbit        2311 non-null   object
 4   Tanggal terbit  2311 non-null   object
 5   Kategori        2311 non-null   object
 6   Description     2270 non-null   object
 7   img_small_link  2270 non-null   object
 8   img_large_link  2311 non-null   object
dtypes: object(9)
memory usage: 162.6+ KB


### Checking null values

#### For books

In [6]:
books_new.isna().sum()

Judul              0
No. ISBN           0
Penulis            0
Penerbit           0
Tanggal terbit     0
Kategori           0
Description       41
img_small_link    41
img_large_link     0
dtype: int64

In [7]:
books_new = books_new.fillna('NA')
books_new.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2345 entries, 0 to 2344
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Judul           2345 non-null   object
 1   No. ISBN        2345 non-null   object
 2   Penulis         2345 non-null   object
 3   Penerbit        2345 non-null   object
 4   Tanggal terbit  2345 non-null   object
 5   Kategori        2345 non-null   object
 6   Description     2345 non-null   object
dtypes: object(7)
memory usage: 128.4+ KB


This dataset will be used for recommending the books to customers.

#### For ratings

In [8]:
ratings.isna().sum()

User-ID        0
ISBN           0
Book-Rating    0
dtype: int64

There is no null value here.

In [15]:
books_new.to_csv('books_cleaned_fix.csv')

In [16]:
ratings


Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6
...,...,...,...
1149775,276704,1563526298,9
1149776,276706,0679447156,0
1149777,276709,0515107662,10
1149778,276721,0590442449,10


In [39]:
buka = ratings['Book-Rating']
buka

0           0
1           5
2           0
3           3
4           6
           ..
1149775     9
1149776     0
1149777    10
1149778    10
1149779     8
Name: Book-Rating, Length: 1149780, dtype: int64

In [20]:
ratings.columns

Index(['User-ID', 'ISBN', 'Book-Rating'], dtype='object')

In [53]:
column = ['ISBN']
jumlah_buka = ratings[column]
jumlah_buka = jumlah_buka.assign(User_ID=ratings['User-ID'],Jumlah_Buku_Dibuka=ratings['Book-Rating'])

In [54]:
jumlah_buka.to_csv('Jumlah_buku_dibuka.csv')