In [1]:
!pip install tensorflow
!pip install tensorflow-gpu
!pip install keras

Collecting tensorflow-gpu
[?25l  Downloading https://files.pythonhosted.org/packages/31/bf/c28971266ca854a64f4b26f07c4112ddd61f30b4d1f18108b954a746f8ea/tensorflow_gpu-2.2.0-cp36-cp36m-manylinux2010_x86_64.whl (516.2MB)
[K     |████████████████████████████████| 516.2MB 28kB/s 
Installing collected packages: tensorflow-gpu
Successfully installed tensorflow-gpu-2.2.0


In [2]:
import tensorflow as tf
import keras
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

Using TensorFlow backend.


In [3]:
print(tf.__version__)
print(keras.__version__)
print(np.__version__)
print(pd.__version__)

2.2.0
2.3.1
1.18.5
1.0.5


In [4]:
!wget -cq https://raw.githubusercontent.com/D-Bhatta/Data-Cleaning-Beginner/master/anne-bonner-tutorial/my_data.csv

In [5]:
device_name = tf.test.gpu_device_name()
# if device_name != '/device:GPU:0':
    # raise SystemError("GPU device not found")
# print(f'Found GPU at: {device_name}')

Load data

In [6]:
dataset = pd.read_csv('my_data.csv')
dataset

Unnamed: 0,Animal,Age,Worth,Friendly
0,Cat,4.0,72000.0,No
1,Dog,17.0,48000.0,Yes
2,Moose,6.0,54000.0,No
3,Dog,8.0,61000.0,No
4,Moose,4.0,,Yes
5,Cat,15.0,58000.0,Yes
6,Dog,,52000.0,No
7,Cat,12.0,79000.0,Yes
8,Moose,5.0,83000.0,No
9,Cat,7.0,67000.0,Yes


In [7]:
x = dataset.iloc[:,:-1].values
x

array([['Cat', 4.0, 72000.0],
       ['Dog', 17.0, 48000.0],
       ['Moose', 6.0, 54000.0],
       ['Dog', 8.0, 61000.0],
       ['Moose', 4.0, nan],
       ['Cat', 15.0, 58000.0],
       ['Dog', nan, 52000.0],
       ['Cat', 12.0, 79000.0],
       ['Moose', 5.0, 83000.0],
       ['Cat', 7.0, 67000.0]], dtype=object)

In [8]:
y = dataset.iloc[:,-1].values
y

array(['No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes'],
      dtype=object)

In [9]:
imputer = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
imputer = imputer.fit(x[:,1:3])

In [10]:
x[:,1:3] = imputer.transform(x[:,1:3])
x

array([['Cat', 4.0, 72000.0],
       ['Dog', 17.0, 48000.0],
       ['Moose', 6.0, 54000.0],
       ['Dog', 8.0, 61000.0],
       ['Moose', 4.0, 48000.0],
       ['Cat', 15.0, 58000.0],
       ['Dog', 4.0, 52000.0],
       ['Cat', 12.0, 79000.0],
       ['Moose', 5.0, 83000.0],
       ['Cat', 7.0, 67000.0]], dtype=object)

In [11]:
labelencoder_x = LabelEncoder()
x[:,0] = labelencoder_x.fit_transform(x[:,0])
x

array([[0, 4.0, 72000.0],
       [1, 17.0, 48000.0],
       [2, 6.0, 54000.0],
       [1, 8.0, 61000.0],
       [2, 4.0, 48000.0],
       [0, 15.0, 58000.0],
       [1, 4.0, 52000.0],
       [0, 12.0, 79000.0],
       [2, 5.0, 83000.0],
       [0, 7.0, 67000.0]], dtype=object)

In [12]:
x_labeled = x[:,0]
x_labeled

array([0, 1, 2, 1, 2, 0, 1, 0, 2, 0], dtype=object)

In [22]:
onehotencoder_x = OneHotEncoder(handle_unknown='ignore')
x_labeled = x_labeled.reshape(-1,1)
x_encoded = onehotencoder_x.fit_transform(x_labeled)
x_encoded = x_encoded.toarray()
x_encoded.shape

(10, 3)

In [28]:
x = np.concatenate([x_encoded,x[:,1:]],axis=1)
x

array([[1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 72000.0],
       [0.0, 1.0, 0.0, 1.0, 0.0, 17.0, 48000.0],
       [0.0, 0.0, 1.0, 0.0, 1.0, 6.0, 54000.0],
       [0.0, 1.0, 0.0, 1.0, 0.0, 8.0, 61000.0],
       [0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 48000.0],
       [1.0, 0.0, 0.0, 0.0, 0.0, 15.0, 58000.0],
       [0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 52000.0],
       [1.0, 0.0, 0.0, 0.0, 0.0, 12.0, 79000.0],
       [0.0, 0.0, 1.0, 0.0, 1.0, 5.0, 83000.0],
       [1.0, 0.0, 0.0, 0.0, 0.0, 7.0, 67000.0]], dtype=object)