# Getting Started with TensorFlow 2.0 in 7 Days
## 1.4 Getting Data into TensorFlow

In [1]:
# install tensorflow
!pip install tf-nightly-2.0-preview



In [0]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd

In [0]:
file_path = keras.utils.get_file("iris.data", "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data")

In [0]:
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
df = pd.read_csv(file_path, names=column_names)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
sepal_length    150 non-null float64
sepal_width     150 non-null float64
petal_length    150 non-null float64
petal_width     150 non-null float64
class           150 non-null object
dtypes: float64(4), object(1)
memory usage: 5.9+ KB


We are interested in the `class` column, which has an `object` type. Let's take a look at some rows.

In [6]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


We can't work with the `class` column as is, and last time we simply dropped it. However, we can't just discard data. What we need to do is implement __one-hot encoding__

In [8]:
df_one_hot = pd.get_dummies(df, prefix=None, columns=['class'])
df_one_hot.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class_Iris-setosa,class_Iris-versicolor,class_Iris-virginica
0,5.1,3.5,1.4,0.2,1,0,0
1,4.9,3.0,1.4,0.2,1,0,0
2,4.7,3.2,1.3,0.2,1,0,0
3,4.6,3.1,1.5,0.2,1,0,0
4,5.0,3.6,1.4,0.2,1,0,0


In [9]:
df_one_hot = pd.get_dummies(df, prefix=None, columns=['class'], drop_first=True)
df_one_hot.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class_Iris-versicolor,class_Iris-virginica
0,5.1,3.5,1.4,0.2,0,0
1,4.9,3.0,1.4,0.2,0,0
2,4.7,3.2,1.3,0.2,0,0
3,4.6,3.1,1.5,0.2,0,0
4,5.0,3.6,1.4,0.2,0,0


In [10]:
df_one_hot.tail()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class_Iris-versicolor,class_Iris-virginica
145,6.7,3.0,5.2,2.3,0,1
146,6.3,2.5,5.0,1.9,0,1
147,6.5,3.0,5.2,2.0,0,1
148,6.2,3.4,5.4,2.3,0,1
149,5.9,3.0,5.1,1.8,0,1
