# How To Load Machine Learning Data

### 1. Load CSV Files with the Python Standard Library

In [16]:
import csv
import numpy as np

filename = 'dataset/diabetes.csv'

with open(filename, 'r', newline='') as raw_data:
    reader = csv.reader(raw_data, delimiter=',', quoting=csv.QUOTE_NONE)
    next(reader)  # skip header
    x = list(reader)

data = np.array(x).astype('float')
print(data)

[[  6.    148.     72.    ...   0.627  50.      1.   ]
 [  1.     85.     66.    ...   0.351  31.      0.   ]
 [  8.    183.     64.    ...   0.672  32.      1.   ]
 ...
 [  5.    121.     72.    ...   0.245  30.      0.   ]
 [  1.    126.     60.    ...   0.349  47.      1.   ]
 [  1.     93.     70.    ...   0.315  23.      0.   ]]


### 2. Load csv files with Numpy

In [22]:
# Load csv using Numpy
from numpy import loadtxt

filename = 'dataset/diabetes.csv'
raw_data = open(filename, 'rb') # rb = read, rt = write
next(raw_data) # skip header
data = loadtxt(raw_data, delimiter=',')
print(data)
print(data.shape)

[[  6.    148.     72.    ...   0.627  50.      1.   ]
 [  1.     85.     66.    ...   0.351  31.      0.   ]
 [  8.    183.     64.    ...   0.672  32.      1.   ]
 ...
 [  5.    121.     72.    ...   0.245  30.      0.   ]
 [  1.    126.     60.    ...   0.349  47.      1.   ]
 [  1.     93.     70.    ...   0.315  23.      0.   ]]
(768, 9)


### 3. Load csv files with Pandas

In [24]:
# Load csv using Pandas
import pandas as pd

filename = 'dataset/diabetes.csv'

# if header not present, then
# names = [.. headers ..]
# data = pd.read_csv(filename, names=names)

data = pd.read_csv(filename)
print(data.shape)

(768, 9)
