# Import Library

In [1]:
import pandas as pd

# Membaca data dari file CSV ke dalam DataFrame

In [2]:
df = pd.read_csv("games-data.csv")

# Membagi dataset menjadi training set dan testing set dengan proporsi 70:30

In [3]:
from sklearn.model_selection import train_test_split

X = df.drop(columns='score') # feature
y = df['score'] # target

# Membagi dataset menjadi training set (70%) dan testing set (30%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# Output untuk memeriksa ukuran training dan testing set
print("Ukuran Training Set:", X_train.shape)
print("Ukuran Testing Set:", X_test.shape)


Ukuran Training Set: (12560, 9)
Ukuran Testing Set: (5384, 9)


# Copy Dataset

In [4]:
df_copy = df.copy()

# Melakukan normalisasi data pada salah satu attribute menggunakan Min Max scaler

In [5]:
from sklearn.preprocessing import MinMaxScaler

# Atribut yang di normalisasi
atribut_normalisasi = 'users'

#Membuat objek MinMaxScaler
mm = MinMaxScaler()

# Melakukan normalisasi Min-Max pada atribut 'users'
df_copy[atribut_normalisasi] = mm.fit_transform(df_copy[[atribut_normalisasi]])

# Mencetak nilai
print('Nilai Min : ',df_copy[atribut_normalisasi].min())
print('Nilai Max : ',df_copy[atribut_normalisasi].max())
print('seluruh Nilai atribut : ')
print(df_copy[atribut_normalisasi])

Nilai Min :  0.0
Nilai Max :  1.0
seluruh Nilai atribut : 
0        0.039306
1        0.004424
2        0.026022
3        0.002215
4        0.023000
           ...   
17939    0.000718
17940    0.000308
17941    0.000882
17942    0.003972
17943    0.001032
Name: users, Length: 17944, dtype: float64


# Melakukan  standarisasi pada dataset

In [6]:
from sklearn.preprocessing import StandardScaler

# Membuat objek StandardScaler
ss = StandardScaler()

# Mengidentifikasi kolom-kolom numerik
numeric_columns = df_copy.select_dtypes(include=['float64', 'int64']).columns

# Melakukan standarisasi pada kolom-kolom numerik
df_copy[numeric_columns] = ss.fit_transform(df_copy[numeric_columns])

print('Hasil Standarisasi :')
df_copy[numeric_columns]

Hasil Standarisasi :


Unnamed: 0,score,critics,users
0,2.305569,-0.067034,4.355536
1,2.224992,-0.241935,0.370396
2,2.224992,2.381588,2.837871
3,2.224992,0.049567,0.118103
4,2.224992,3.664199,2.492627
...,...,...,...
17939,-4.301719,-0.475138,-0.052957
17940,-4.301719,-0.708340,-0.099822
17941,-4.301719,-0.941542,-0.034210
17942,-4.382295,-0.824941,0.318844


# Melakukan Data cleaning pada data dengan nilai null

**Sebelum**

In [7]:
df.isnull().sum()

name           0
platform       0
r-date         0
score          0
user score     0
developer      0
genre          0
players       22
critics        0
users          0
dtype: int64

Terdapat 22 nilai null pada atribut Players

**Sesudah**

In [8]:
from sklearn.impute import SimpleImputer

# Mengganti nilai menjadi modus
imputer = SimpleImputer(strategy='most_frequent')

# Mengisi nilai yang hilang dalam kolom 'players'
df["players"] = imputer.fit_transform(df[["players"]])

# Mencetak jumlah nilai null setelah penggunaan SimpleImputer
print("Jumlah null setelah menggunakan SimpleImputer: ")
print(df.isna().sum())

Jumlah null setelah menggunakan SimpleImputer: 
name          0
platform      0
r-date        0
score         0
user score    0
developer     0
genre         0
players       0
critics       0
users         0
dtype: int64


# Melakukan Data cleaning pada data dengan nilai duplikat

**Sebelum**

In [9]:
print("Jumlah nilai Duplikat:", df.duplicated().sum())

Jumlah nilai Duplikat: 53


**Sesudah**

In [10]:
# Menghapus baris-baris yang memiliki nilai duplikat
df.drop_duplicates(inplace=True)

print("Setelah menggunakan fungsi drop_duplicates()")
print("Jumlah nilai Duplikat:", df.duplicated().sum())

Setelah menggunakan fungsi drop_duplicates()
Jumlah nilai Duplikat: 0


# Mengganti tipe data salah satu attribute angka

**Sebelum**

In [11]:
df.head()

Unnamed: 0,name,platform,r-date,score,user score,developer,genre,players,critics,users
0,The Legend of Zelda: Ocarina of Time,Nintendo64,"November 23, 1998",99,9.1,Nintendo,"Action Adventure,Fantasy",1 Player,22,5749
1,Tony Hawk's Pro Skater 2,PlayStation,"September 20, 2000",98,7.4,NeversoftEntertainment,"Sports,Alternative,Skateboarding",1-2,19,647
2,Grand Theft Auto IV,PlayStation3,"April 29, 2008",98,7.6,RockstarNorth,"Action Adventure,Modern,Modern,Open-World",1 Player,64,3806
3,SoulCalibur,Dreamcast,"September 8, 1999",98,8.5,Namco,"Action,Fighting,3D",1-2,24,324
4,Grand Theft Auto IV,Xbox360,"April 29, 2008",98,7.9,RockstarNorth,"Action Adventure,Modern,Modern,Open-World",1 Player,86,3364


Atribut score masih bernilai Integer

**Sesudah**

In [12]:
# Mengubah tipe data atribut score menjadi float
df['score'] = df['score'].astype(float)
df.head()

Unnamed: 0,name,platform,r-date,score,user score,developer,genre,players,critics,users
0,The Legend of Zelda: Ocarina of Time,Nintendo64,"November 23, 1998",99.0,9.1,Nintendo,"Action Adventure,Fantasy",1 Player,22,5749
1,Tony Hawk's Pro Skater 2,PlayStation,"September 20, 2000",98.0,7.4,NeversoftEntertainment,"Sports,Alternative,Skateboarding",1-2,19,647
2,Grand Theft Auto IV,PlayStation3,"April 29, 2008",98.0,7.6,RockstarNorth,"Action Adventure,Modern,Modern,Open-World",1 Player,64,3806
3,SoulCalibur,Dreamcast,"September 8, 1999",98.0,8.5,Namco,"Action,Fighting,3D",1-2,24,324
4,Grand Theft Auto IV,Xbox360,"April 29, 2008",98.0,7.9,RockstarNorth,"Action Adventure,Modern,Modern,Open-World",1 Player,86,3364


Record atribut score sudah berubah menjadi float

# Melakukan one hot encoding pada dataset

In [13]:
from sklearn.preprocessing import OneHotEncoder

# Melakukan one-hot encoding pada kolom 'platform'
data_encoded = pd.get_dummies(df, columns=['platform'], dtype=bool)

# Mengatur opsi tampilan untuk menampilkan seluruh kolom
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)

# Menampilkan dataset yang telah dienkoding
data_encoded

Unnamed: 0,name,r-date,score,user score,developer,genre,players,critics,users,platform_3DS,platform_DS,platform_Dreamcast,platform_GameBoyAdvance,platform_GameCube,platform_Nintendo64,platform_PC,platform_PSP,platform_PlayStation,platform_PlayStation2,platform_PlayStation3,platform_PlayStation4,platform_PlayStation5,platform_PlayStationVita,platform_Stadia,platform_Switch,platform_Wii,platform_WiiU,platform_Xbox,platform_Xbox360,platform_XboxOne,platform_XboxSeriesX
0,The Legend of Zelda: Ocarina of Time,"November 23, 1998",99.0,9.1,Nintendo,"Action Adventure,Fantasy",1 Player,22,5749,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,Tony Hawk's Pro Skater 2,"September 20, 2000",98.0,7.4,NeversoftEntertainment,"Sports,Alternative,Skateboarding",1-2,19,647,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False
2,Grand Theft Auto IV,"April 29, 2008",98.0,7.6,RockstarNorth,"Action Adventure,Modern,Modern,Open-World",1 Player,64,3806,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False
3,SoulCalibur,"September 8, 1999",98.0,8.5,Namco,"Action,Fighting,3D",1-2,24,324,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,Grand Theft Auto IV,"April 29, 2008",98.0,7.9,RockstarNorth,"Action Adventure,Modern,Modern,Open-World",1 Player,86,3364,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17939,Vroom in the Night Sky,"April 5, 2017",17.0,3.1,Poisoft,"Sports,Individual,Biking",No Online Multiplayer,15,105,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False
17940,Leisure Suit Larry: Box Office Bust,"May 5, 2009",17.0,1.9,Team17,"Action Adventure,Adventure,Third-Person,Open-W...",No Online Multiplayer,11,45,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False
17941,Yaris,"October 10, 2007",17.0,4.3,BackboneEntertainment,"Driving,Racing,Arcade,Arcade,Automobile",2 Online,7,129,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False
17942,Ride to Hell: Retribution,"June 24, 2013",16.0,1.3,"Eutechnyx,DeepSilver","Driving,Modern,Racing,Motorcycle,Motocross,Mod...",No info,9,581,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
