In [1]:
import numpy as np
from io import StringIO
from pprint import pprint
from tabulate import tabulate

# Importing Data

### - .genfromtxt
### - .loadtxt

`genfromtxt` is slower than `loadtxt` but more flexible as `genfromtxt` take missing data into account

In [2]:
data = "1, 2, 3\n4, 5, 6"
q = np.genfromtxt(StringIO(data), delimiter=',')
print (tabulate(q, tablefmt="grid"), end="\n\n")

data = "  1  2  3\n  4  5 67\n890123  4"
w = np.genfromtxt(StringIO(data), delimiter=3) # delimiter=3 ==> each column should consist 3 chars
print (tabulate(w, tablefmt="grid"), end="\n\n")

data = "123456789\n   4  7 9\n   4567 9"
e = np.genfromtxt(StringIO(data), delimiter=(4, 3, 2))
# delimiter=(4, 3, 2) ==> size of first column = 4, size of second column = 3, size of third column = 2
print (tabulate(e, tablefmt="grid"), end="\n\n")

data = "123456789\n   4  7 9\n  4567 9"
r = np.genfromtxt(StringIO(data), delimiter=(4, 3, 2))
# delimiter=(4, 3, 2) ==> size of first column = 4, size of second column = 3, size of third column = 2
print (tabulate(r, tablefmt="grid"), end="\n\n")

+---+---+---+
| 1 | 2 | 3 |
+---+---+---+
| 4 | 5 | 6 |
+---+---+---+

+-----+-----+----+
|   1 |   2 |  3 |
+-----+-----+----+
|   4 |   5 | 67 |
+-----+-----+----+
| 890 | 123 |  4 |
+-----+-----+----+

+------+-----+----+
| 1234 | 567 | 89 |
+------+-----+----+
|    4 |   7 |  9 |
+------+-----+----+
|    4 | 567 |  9 |
+------+-----+----+

+------+-----+----+
| 1234 | 567 | 89 |
+------+-----+----+
|    4 |   7 |  9 |
+------+-----+----+
|   45 |  67 |  9 |
+------+-----+----+



In [3]:
data = "1, abc , 2\n 3, xxx, 4"

q = np.genfromtxt(StringIO(data), delimiter=',', dtype="|U5")
print (tabulate(q, tablefmt="grid"), end="\n\n")

w = np.genfromtxt(StringIO(data), delimiter=",", autostrip=True, dtype="|U5") # autostrip=True ==> removes white spaces
print(tabulate(w, tablefmt="grid"), end="\n\n")

+---+-----+----+
| 1 | abc |  2 |
+---+-----+----+
| 3 | xxx |  4 |
+---+-----+----+

+---+-----+---+
| 1 | abc | 2 |
+---+-----+---+
| 3 | xxx | 4 |
+---+-----+---+



In [4]:
data = "# this is a comment\n1, 2, 3, 4\n# this is a comment\n5, 6, 7, 8"
q = np.genfromtxt(StringIO(data), comments='#', delimiter=',')
print(tabulate(q, tablefmt="grid"), end="\n\n")

+---+---+---+---+
| 1 | 2 | 3 | 4 |
+---+---+---+---+
| 5 | 6 | 7 | 8 |
+---+---+---+---+



# Skipping Lines & Choosing Columns
### - skip_header arguments
### - skip_footer arguments
### - usecols arguments

In [5]:
data = "1, 2, 3\n4, 5, 6"
q = np.genfromtxt(StringIO(data), delimiter=',')
print (tabulate(q, tablefmt="grid"), end="\n\n")

w = np.genfromtxt(StringIO(data), delimiter=',', skip_header=1)
print ("w = ", w, end="\n\n")

e = np.genfromtxt(StringIO(data), delimiter=',', skip_footer=1)
print ("e = ", e, end="\n\n")

r = np.genfromtxt(StringIO(data), delimiter=',', usecols=(1,2))
print (tabulate(r, tablefmt="grid"), end="\n\n")

+---+---+---+
| 1 | 2 | 3 |
+---+---+---+
| 4 | 5 | 6 |
+---+---+---+

w =  [4. 5. 6.]

e =  [1. 2. 3.]

+---+---+
| 2 | 3 |
+---+---+
| 5 | 6 |
+---+---+



In [6]:
# in the case of columns with names

data = "1, 2, 3, 4\n5, 6, 7, 8\n9, 10, 11, 12"

q = np.genfromtxt(StringIO(data), names="a, b, c, d", delimiter=',', usecols=('a', 'd'))
print (tabulate(q, tablefmt="grid"), end="\n\n")

w = np.genfromtxt(StringIO(data), names="a, b, c, d", delimiter=',', usecols=('b', 'c'))
print (tabulate(w, tablefmt="grid"), end="\n\n")

data = "A, B, C, D\n1, 2, 3, 4\n5, 6, 7, 8\n9, 10, 11, 12"

e = np.genfromtxt(StringIO(data), names=True, delimiter=',', usecols=('A', 'D'))
print (tabulate(e, tablefmt="grid"), end="\n\n")

r = np.genfromtxt(StringIO(data), names=True, delimiter=',', usecols=('B', 'C'))
print (tabulate(r, tablefmt="grid"), end="\n\n")

+---+----+
| 1 |  4 |
+---+----+
| 5 |  8 |
+---+----+
| 9 | 12 |
+---+----+

+----+----+
|  2 |  3 |
+----+----+
|  6 |  7 |
+----+----+
| 10 | 11 |
+----+----+

+---+----+
| 1 |  4 |
+---+----+
| 5 |  8 |
+---+----+
| 9 | 12 |
+---+----+

+----+----+
|  2 |  3 |
+----+----+
|  6 |  7 |
+----+----+
| 10 | 11 |
+----+----+



# Conversion & Lamda Function
### - converters argument

In [7]:
data = "1, 2.3%, 45.\n6, 78.9%, 0"

q = np.genfromtxt(StringIO(data), delimiter=',')
print(tabulate(q, tablefmt="grid"), end="\n\n")

lamda_convert_function = lambda x : float(x.strip(b'%'))/100. # <-- ***
w = np.genfromtxt(StringIO(data), delimiter=',', converters={1:lamda_convert_function})
print (tabulate(w, tablefmt="grid"), end="\n\n")

+---+-----+----+
| 1 | nan | 45 |
+---+-----+----+
| 6 | nan |  0 |
+---+-----+----+

+---+-------+----+
| 1 | 0.023 | 45 |
+---+-------+----+
| 6 | 0.789 |  0 |
+---+-------+----+



# Missing & Filling Values
### - ??? missing_values argument
### - filling_values argument

In [8]:
data = "N/A, 2, 3\n4, ,???\n???, 10, N/A"

q = np.genfromtxt(StringIO(data), delimiter=',')
print (tabulate(q, tablefmt="grid"), end="\n\n")

w = np.genfromtxt(StringIO(data), delimiter=',', filling_values={0:111, 1:222})
print (tabulate(w, tablefmt="grid"), end="\n\n")

e = np.genfromtxt(StringIO(data), delimiter=',', filling_values={2:333})
print (tabulate(e, tablefmt="grid"), end="\n\n")

r = np.genfromtxt(StringIO(data), delimiter=',', filling_values={0:111, 1:222, 2:333})
print (tabulate(r, tablefmt="grid"), end="\n\n")

+-----+-----+-----+
| nan |   2 |   3 |
+-----+-----+-----+
|   4 | nan | nan |
+-----+-----+-----+
| nan |  10 | nan |
+-----+-----+-----+

+-----+-----+-----+
| 111 |   2 |   3 |
+-----+-----+-----+
|   4 | 222 | nan |
+-----+-----+-----+
| 111 |  10 | nan |
+-----+-----+-----+

+-----+-----+-----+
| nan |   2 |   3 |
+-----+-----+-----+
|   4 | nan | 333 |
+-----+-----+-----+
| nan |  10 | 333 |
+-----+-----+-----+

+-----+-----+-----+
| 111 |   2 |   3 |
+-----+-----+-----+
|   4 | 222 | 333 |
+-----+-----+-----+
| 111 |  10 | 333 |
+-----+-----+-----+

