In [4]:
import pandas as pd
import numpy as np

# first row from file is header,  indexes added auto
df = pd.read_csv('/home/vk/Python_Source/pydata-book/examples/ex1.csv')
df
"""
 	a 	b 	c 	d 	message
0 	1 	2 	3 	4 	hello
1 	5 	6 	7 	8 	world
2 	9 	10 	11 	12 	foo"""

# file without header
# var.1, column-indexing is auto
pd.read_csv('/home/vk/Python_Source/pydata-book/examples/ex2.csv', header = None)
"""
 	0 	1 	2 	3 	4
0 	1 	2 	3 	4 	hello
1 	5 	6 	7 	8 	world
2 	9 	10 	11 	12 	foo"""

# var.2
pd.read_csv('/home/vk/Python_Source/pydata-book/examples/ex2.csv', names = ['a', 'b', 'c', 'd', 'message'])
"""
a 	b 	c 	d 	    message
0 	1 	2 	3 	4 	hello
1 	5 	6 	7 	8 	world
2 	9 	10 	11 	12 	foo"""

names = ['a', 'b', 'c', 'd', 'message']
pd.read_csv('/home/vk/Python_Source/pydata-book/examples/ex2.csv', names = names, index_col = 'message')

# hierarchical index
pd.read_csv('/home/vk/Python_Source/pydata-book/examples/csv_mindex.csv', index_col = ['key1', 'key2'])
"""
key1  key2 		
one 	a 	1 	2
        b 	3 	4
        c 	5 	6
        d 	7 	8
two 	a 	9 	10
        b 	11 	12
        c 	13 	14
        d 	15 	16"""

# file with spaces instead separator
result = pd.read_csv('/home/vk/Python_Source/pydata-book/examples/ex3.txt', sep = "\s+")
result
"""
            A        	B       	C
aaa 	-0.264438 	-1.026059 	-0.619500
bbb 	0.927272 	0.302904 	-0.032399
ccc 	-0.264273 	-0.386314 	-0.217601
ddd 	-0.871858 	-0.348382 	1.100491"""

# skip 1,3,4 rows in file
pd.read_csv('/home/vk/Python_Source/pydata-book/examples/ex4.csv', skiprows = [0, 2, 3])
"""
 	a 	b 	c 	d 	message
0 	1 	2 	3 	4 	hello
1 	5 	6 	7 	8 	world
2 	9 	10 	11 	12 	foo"""

# missing values
# PANDAS get values 'NA', '-1.', '#IND', 'NULL' as missing values
result = pd.read_csv('/home/vk/Python_Source/pydata-book/examples/ex5.csv')
"""
something,a,b,c,d,message
one,1,2,3,4,NA
two,5,6,,8,world
three,9,10,11,12,foo"""

"""
 something 	a 	b 	c 	d 	message
0 	one 	1 	2 	3.0 	4 	NaN
1 	two 	5 	6 	NaN 	8 	world
2 	three 	9 	10 	11.0 	12 	foo"""

pd.isna(result)
"""
 	something 	a 	b 	c 	d 	message
0 	False 	False 	False 	False 	False 	True
1 	False 	False 	False 	True 	False 	False
2 	False 	False 	False 	False 	False 	False"""

# result = pd.read_csv('/home/vk/Python_Source/pydata-book/examples/ex5.csv', na_values = ['NULL']) hernja!!!

# turn off processing of missing values
results2 = pd.read_csv('/home/vk/Python_Source/pydata-book/examples/ex5.csv',  keep_default_na = False)
results2
"""
 	something 	a 	b 	c 	d 	message
0 	one 	1 	2 	3 	4 	NA
1 	two 	5 	6 		8 	world
2 	three 	9 	10 	11 	12 	foo"""

results2.isna()
"""
    something 	a 	b 	      c 	d 	  message
0 	False 	False 	False 	False 	False 	False
1 	False 	False 	False 	False 	False 	False
2 	False 	False 	False 	False 	False 	False"""

# exchange only 'NA' char in file
results3 = pd.read_csv('/home/vk/Python_Source/pydata-book/examples/ex5.csv',
                       keep_default_na=False, na_values = ['NA'])
results3
"""
something 	a 	b 	c 	d 	message
0 	one 	1 	2 	3 	4 	NaN
1 	two 	5 	6 		8 	world
2 	three 	9 	10 	11 	12 	foo"""

results3.isna()
"""
 	something 	a 	b 	c 	d 	message
0 	False 	False 	False 	False 	False 	True
1 	False 	False 	False 	False 	False 	False
2 	False 	False 	False 	False 	False 	False"""

# create own markers for exchange in the file
sentinels = {'message':['foo', 'NA'], 'b': '10'}
pd.read_csv('/home/vk/Python_Source/pydata-book/examples/ex5.csv', na_values=sentinels, keep_default_na=False)
"""
 	something 	a 	b 	c 	d 	message
0 	one 	1 	2.0 	3 	4 	NaN
1 	two 	5 	6.0 		8 	world
2 	three 	9 	NaN 	11 	12 	NaN"""

  result = pd.read_csv('/home/vk/Python_Source/pydata-book/examples/ex3.txt', sep = "\s+")


Unnamed: 0,something,a,b,c,d,message
0,one,1,2.0,3.0,4,
1,two,5,6.0,,8,world
2,three,9,,11.0,12,
