# Reading and Writing Text Files

In [3]:
import numpy as np
import pandas as pd
from pandas import Series,DataFrame

In [4]:
# default uses ',' as delimiter and first row is column name
dframe = pd.read_csv('test.csv')
dframe

Unnamed: 0,q,r,s,t,apple
0,2,3,4,5,pear
1,a,s,d,f,rabbit
2,5,2,5,7,dog


In [5]:
# tell it no header included in file
dframe = pd.read_csv('test.csv', header = None)
dframe

Unnamed: 0,0,1,2,3,4
0,q,r,s,t,apple
1,2,3,4,5,pear
2,a,s,d,f,rabbit
3,5,2,5,7,dog


In [6]:
# equivalent, requires explicitly setting delimiter
dframe = pd.read_table('test.csv', sep = ',', header = None)
dframe

Unnamed: 0,0,1,2,3,4
0,q,r,s,t,apple
1,2,3,4,5,pear
2,a,s,d,f,rabbit
3,5,2,5,7,dog


In [7]:
# specify what number of rows to read
pd.read_csv('test.csv', header = None, nrows = 2)

Unnamed: 0,0,1,2,3,4
0,q,r,s,t,apple
1,2,3,4,5,pear


In [8]:
# export a dataframe to csv
dframe.to_csv('export_test.csv')

In [9]:
import sys

In [10]:
dframe.to_csv(sys.stdout)

,0,1,2,3,4
0,q,r,s,t,apple
1,2,3,4,5,pear
2,a,s,d,f,rabbit
3,5,2,5,7,dog


In [11]:
# save file with different delimiter/separator
dframe.to_csv(sys.stdout, sep = '_')

_0_1_2_3_4
0_q_r_s_t_apple
1_2_3_4_5_pear
2_a_s_d_f_rabbit
3_5_2_5_7_dog


In [12]:
dframe.to_csv(sys.stdout, sep = '?')

?0?1?2?3?4
0?q?r?s?t?apple
1?2?3?4?5?pear
2?a?s?d?f?rabbit
3?5?2?5?7?dog


In [13]:
# export specific columns
dframe.to_csv(sys.stdout, columns = [0, 1, 2])

,0,1,2
0,q,r,s
1,2,3,4
2,a,s,d
3,5,2,5


Other native python/pandas read/write methods: https://docs.python.org/2/library/csv.html

# JSON with Python

In [14]:
json_obj = """
{   "zoo_animal": "Lion",
    "food": ["Meat", "Veggies", "Honey"],
    "fur": "Golden",
    "clothes": null, 
    "diet": [{"zoo_animal": "Gazelle", "food":"grass", "fur": "Brown"}]
}
"""

In [15]:
import json

In [16]:
data = json.loads(json_obj)
data

{u'clothes': None,
 u'diet': [{u'food': u'grass', u'fur': u'Brown', u'zoo_animal': u'Gazelle'}],
 u'food': [u'Meat', u'Veggies', u'Honey'],
 u'fur': u'Golden',
 u'zoo_animal': u'Lion'}

In [17]:
json.dumps(data)

'{"food": ["Meat", "Veggies", "Honey"], "zoo_animal": "Lion", "fur": "Golden", "diet": [{"food": "grass", "zoo_animal": "Gazelle", "fur": "Brown"}], "clothes": null}'

In [18]:
dframe1 = DataFrame(data['diet'])
dframe1

Unnamed: 0,food,fur,zoo_animal
0,grass,Brown,Gazelle


# HTML with Python

In [19]:
from pandas import read_html

In [20]:
url = 'http://www.fdic.gov/bank/individual/failed/banklist.html'

dframe_list = pd.io.html.read_html(url)
dframe2 = dframe_list[0]
dframe2.head()

Unnamed: 0,Bank Name,City,ST,CERT,Acquiring Institution,Closing Date,Updated Date,Loss Share Type,Agreement Terminated,Termination Date
0,Hometown National Bank,Longview,WA,35156,Twin City Bank,"October 2, 2015","February 12, 2016",,,
1,The Bank of Georgia,Peachtree City,GA,35259,Fidelity Bank,"October 2, 2015","February 12, 2016",,,
2,Premier Bank,Denver,CO,34112,"United Fidelity Bank, fsb","July 10, 2015","December 17, 2015",none,,
3,Edgebrook Bank,Chicago,IL,57772,Republic Bank of Chicago,"May 8, 2015","July 23, 2015",none,,
4,Doral BankEn Espanol,San Juan,PR,32102,Banco Popular de Puerto Rico,"February 27, 2015","May 13, 2015",none,,


In [21]:
# get column names/values
dframe2.columns.values

array([u'Bank Name', u'City', u'ST', u'CERT', u'Acquiring Institution',
       u'Closing Date', u'Updated Date', u'Loss Share Type',
       u'Agreement Terminated', u'Termination Date'], dtype=object)

# Excel with Python

In [22]:
xlsfile = pd.ExcelFile('excel_test.xlsx')

dframe = xlsfile.parse('Sheet1')
dframe

Unnamed: 0,This is a test,Unnamed: 1,Unnamed: 2
0,23,6678,456
1,234,678,456
2,234,7,345
3,34,56,234
4,5,456,2365
