In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
print(X)

[['Rendy' 60.0 140.0]
 ['Martin' nan 162.0]
 ['Iqball' 56.0 171.0]
 ['Rayanza' 45.0 nan]
 ['Saputra' 75.0 149.0]
 ['Rafatar' 44.0 173.0]
 ['Wonka' 66.0 172.0]
 ['Semesta' nan 177.0]
 ['Langit' 55.0 170.0]
 ['Mary' 35.0 150.0]]


In [4]:
print(y)

['No' 'Yes' 'No' 'No' 'Yes' 'yes' 'No' 'Yes' 'No' 'Yes']


In [5]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer.fit(X[:, 1:3])
X[:, 1:3] = imputer.transform(X[:, 1:3])

In [6]:
print(X)

[['Rendy' 60.0 140.0]
 ['Martin' 54.5 162.0]
 ['Iqball' 56.0 171.0]
 ['Rayanza' 45.0 162.66666666666666]
 ['Saputra' 75.0 149.0]
 ['Rafatar' 44.0 173.0]
 ['Wonka' 66.0 172.0]
 ['Semesta' 54.5 177.0]
 ['Langit' 55.0 170.0]
 ['Mary' 35.0 150.0]]


In [7]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X = ct.fit_transform(X)

In [8]:
print(X)

  (0, 6)	1.0
  (0, 10)	60.0
  (0, 11)	140.0
  (1, 2)	1.0
  (1, 10)	54.5
  (1, 11)	162.0
  (2, 0)	1.0
  (2, 10)	56.0
  (2, 11)	171.0
  (3, 5)	1.0
  (3, 10)	45.0
  (3, 11)	162.66666666666666
  (4, 7)	1.0
  (4, 10)	75.0
  (4, 11)	149.0
  (5, 4)	1.0
  (5, 10)	44.0
  (5, 11)	173.0
  (6, 9)	1.0
  (6, 10)	66.0
  (6, 11)	172.0
  (7, 8)	1.0
  (7, 10)	54.5
  (7, 11)	177.0
  (8, 1)	1.0
  (8, 10)	55.0
  (8, 11)	170.0
  (9, 3)	1.0
  (9, 10)	35.0
  (9, 11)	150.0


In [9]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

In [10]:
print(y)

[0 1 0 0 1 2 0 1 0 1]


In [11]:
from sklearn.model_selection import train_test_split
X = X.toarray()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [12]:
print(X_train)

[[  0.           0.           0.           0.           0.
    0.           0.           0.           0.           1.
   66.         172.        ]
 [  0.           0.           0.           0.           0.
    0.           0.           1.           0.           0.
   75.         149.        ]
 [  0.           0.           0.           0.           0.
    0.           1.           0.           0.           0.
   60.         140.        ]
 [  0.           0.           0.           0.           0.
    1.           0.           0.           0.           0.
   45.         162.66666667]
 [  0.           0.           1.           0.           0.
    0.           0.           0.           0.           0.
   54.5        162.        ]
 [  0.           0.           0.           0.           0.
    0.           0.           0.           1.           0.
   54.5        177.        ]
 [  0.           1.           0.           0.           0.
    0.           0.           0.           0.           0.


In [13]:
print(X_test)

[[  1.   0.   0.   0.   0.   0.   0.   0.   0.   0.  56. 171.]
 [  0.   0.   0.   1.   0.   0.   0.   0.   0.   0.  35. 150.]]


In [14]:
print(y_train)

[0 1 0 0 1 1 0 2]


In [15]:
print(X_test)

[[  1.   0.   0.   0.   0.   0.   0.   0.   0.   0.  56. 171.]
 [  0.   0.   0.   1.   0.   0.   0.   0.   0.   0.  35. 150.]]


In [17]:
print(y_test)

[0 1]


In [18]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train[:, 3:] = sc.fit_transform(X_train[:, 3:])
X_test[:, 3:] = sc.transform(X_test[:, 3:])

In [19]:
print(X_train)

[[ 0.          0.          0.          0.         -0.37796447 -0.37796447
  -0.37796447 -0.37796447 -0.37796447  2.64575131  0.95982558  0.73271398]
 [ 0.          0.          0.          0.         -0.37796447 -0.37796447
  -0.37796447  2.64575131 -0.37796447 -0.37796447  1.89370993 -1.18414913]
 [ 0.          0.          0.          0.         -0.37796447 -0.37796447
   2.64575131 -0.37796447 -0.37796447 -0.37796447  0.33723601 -1.93422601]
 [ 0.          0.          0.          0.         -0.37796447  2.64575131
  -0.37796447 -0.37796447 -0.37796447 -0.37796447 -1.2192379  -0.04514352]
 [ 0.          0.          1.          0.         -0.37796447 -0.37796447
  -0.37796447 -0.37796447 -0.37796447 -0.37796447 -0.23347109 -0.10070477]
 [ 0.          0.          0.          0.         -0.37796447 -0.37796447
  -0.37796447 -0.37796447  2.64575131 -0.37796447 -0.23347109  1.14942335]
 [ 0.          1.          0.          0.         -0.37796447 -0.37796447
  -0.37796447 -0.37796447 -0.377

In [21]:
print(X_test)

[[ 1.          0.          0.          0.         -0.37796447 -0.37796447
  -0.37796447 -0.37796447 -0.37796447 -0.37796447 -0.0778237   0.64937211]
 [ 0.          0.          0.          1.         -0.37796447 -0.37796447
  -0.37796447 -0.37796447 -0.37796447 -0.37796447 -2.25688717 -1.10080726]]
