In [87]:
import pandas as pd

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split

## Membaca dataset


In [88]:
df = pd.read_csv('googleplaystore.csv')
df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


## Mengecek nilai null

In [89]:
df.isnull().sum()

App                  0
Category             0
Rating            1474
Reviews              0
Size                 0
Installs             0
Type                 1
Price                0
Content Rating       1
Genres               0
Last Updated         0
Current Ver          8
Android Ver          3
dtype: int64

## Mengecek tipe data

In [90]:
df.dtypes

App                object
Category           object
Rating            float64
Reviews            object
Size               object
Installs           object
Type               object
Price              object
Content Rating     object
Genres             object
Last Updated       object
Current Ver        object
Android Ver        object
dtype: object

## Membersihkan dataset dengan menggunakan modus

In [91]:
from sklearn.impute import SimpleImputer

In [92]:
simpleImputerObjek = SimpleImputer(strategy = 'most_frequent')

In [93]:
df['Rating'] = simpleImputerObjek.fit_transform(df[['Rating']])
df['Type'] = simpleImputerObjek.fit_transform(df[['Type']])
df['Content Rating'] = simpleImputerObjek.fit_transform(df[['Content Rating']])
df['Current Ver'] = simpleImputerObjek.fit_transform(df[['Current Ver']])
df['Android Ver'] = simpleImputerObjek.fit_transform(df[['Android Ver']])

In [94]:
df.isna().sum()

App               0
Category          0
Rating            0
Reviews           0
Size              0
Installs          0
Type              0
Price             0
Content Rating    0
Genres            0
Last Updated      0
Current Ver       0
Android Ver       0
dtype: int64

## Encoding data objek menggunakan ordinal encoder

In [95]:
encoder = OrdinalEncoder()
df["Category"] = encoder.fit_transform(df[["Category"]])
df["Size"] = encoder.fit_transform(df[["Size"]])
df["Genres"] = encoder.fit_transform(df[["Genres"]])
df["Android Ver"] = encoder.fit_transform(df[["Android Ver"]])

## Data split

In [96]:
X = df[['Category', 'Size', 'Genres', 'Android Ver']]
Y = df['Rating']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

# Menampilkan Dimensi
print("Dimensi x_Train: ", X_train.shape)
print("Dimensi x_Test: ", X_test.shape)
print("Dimensi y_Train: ", Y_train.shape)
print("Dimensi y_Test: ", Y_test.shape)

Dimensi x_Train:  (7588, 4)
Dimensi x_Test:  (3253, 4)
Dimensi y_Train:  (7588,)
Dimensi y_Test:  (3253,)


## Mengecek nilai unique pada atribut country

In [97]:
df['Rating'].unique()

array([ 4.1,  3.9,  4.7,  4.5,  4.3,  4.4,  3.8,  4.2,  4.6,  3.2,  4. ,
        4.8,  4.9,  3.6,  3.7,  3.3,  3.4,  3.5,  3.1,  5. ,  2.6,  3. ,
        1.9,  2.5,  2.8,  2.7,  1. ,  2.9,  2.3,  2.2,  1.7,  2. ,  1.8,
        2.4,  1.6,  2.1,  1.4,  1.5,  1.2, 19. ])

## Encoding label


In [98]:
num_class = 44

y_train = tf.keras.utils.to_categorical(Y_train, num_class)
y_test = tf.keras.utils.to_categorical(Y_test, num_class)

## Membuat DNN

In [99]:
model = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(128, activation='relu', input_dim = 4),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(44, activation='softmax')
    ]
)
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 128)               640       
                                                                 
 dense_4 (Dense)             (None, 64)                8256      
                                                                 
 dense_5 (Dense)             (None, 44)                2860      
                                                                 
Total params: 11,756
Trainable params: 11,756
Non-trainable params: 0
_________________________________________________________________


## Optimasi Model

In [100]:
model.compile(optimizer='sgd', 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

## Training model

In [101]:
model.fit(X_train, y_train, epochs = 10, validation_data = (X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x22bfc613160>

## Evaluasi model

In [102]:
score_testing = model.evaluate(X_test, y_test)
score_training = model.evaluate(X_train, y_train)

print("""----------------------------------------
            SCORE TESTING                
----------------------------------------""")
print(" TEST LOSS        : ", score_testing[0])
print(" TEST ACCURACY    : ", score_testing[1])
print("""----------------------------------------
            SCORE TRAINING                
----------------------------------------""")
print(" TEST LOSS        : ", score_training[0])
print(" TEST ACCURACY    : ", score_training[1])
print("----------------------------------------")

----------------------------------------
            SCORE TESTING                
----------------------------------------
 TEST LOSS        :  0.03379112482070923
 TEST ACCURACY    :  0.7878881096839905
----------------------------------------
            SCORE TRAINING                
----------------------------------------
 TEST LOSS        :  0.03297630697488785
 TEST ACCURACY    :  0.7911175489425659
----------------------------------------


## Save model

In [103]:
model.save('Posttest8')

INFO:tensorflow:Assets written to: Posttest8\assets
