<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#CSV-Files" data-toc-modified-id="CSV-Files-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>CSV Files</a></span><ul class="toc-item"><li><span><a href="#Reading" data-toc-modified-id="Reading-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Reading</a></span></li><li><span><a href="#Writing" data-toc-modified-id="Writing-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Writing</a></span></li></ul></li><li><span><a href="#JSON-Data" data-toc-modified-id="JSON-Data-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>JSON Data</a></span></li><li><span><a href="#Excel-Files" data-toc-modified-id="Excel-Files-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Excel Files</a></span><ul class="toc-item"><li><span><a href="#Reading" data-toc-modified-id="Reading-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Reading</a></span></li><li><span><a href="#Writing" data-toc-modified-id="Writing-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Writing</a></span></li></ul></li><li><span><a href="#SAS-Files" data-toc-modified-id="SAS-Files-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>SAS Files</a></span></li><li><span><a href="#Stata-Files" data-toc-modified-id="Stata-Files-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Stata Files</a></span></li><li><span><a href="#HDF5-Files" data-toc-modified-id="HDF5-Files-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>HDF5 Files</a></span></li><li><span><a href="#MATLAB-Files" data-toc-modified-id="MATLAB-Files-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>MATLAB Files</a></span></li></ul></div>

# CSV Files
## Reading

In [None]:
import pandas as pd
df = pd.read_csv('filename.csv', sep=',', 
                 header=0,  # index of the row containing column names
                 usecols=[0, 1, 2])  # read only these columns

df = pd.read_csv('filename.csv', sep=',', 
                 header=None, 
                 names=['col1', 'col2', 'col3'])  # name/rename columns

df = pd.read_csv('filename.csv', sep=r'\s+', header=0)  # regexp as separator

df = pd.read_csv('filename.csv', sep=',', header=0, nrows=100)  # read only 100 rows

# read csv file in pieces
pieces = pd.read_csv('filename.csv', sep=',', header=0, chunksize=100)  
for df in pieces:
    pass


## Writing

In [None]:
df.to_csv('filename.csv', na_rep='NA', columns=['col1', 'col3'])

* See also Python's built-in `csv` module.

# JSON Data

In [31]:
import json
data_json = '''{"classmates": [
    {"name": "Wes", "age": 32, "pet": "cat"},
    {"name": "Scott", "age": 27, "pet": null},
    {"name": "Katie", "age": 30, "pet": "dog"} ]}'''
data_dict = json.loads(obj)
print(data_dict['classmates'])
print('---------')

df = pd.DataFrame(data_dict['classmates'])
print(df)
print('---------')

print(df.to_json(path_or_buf=None))  # give path_or_buf to write to a file
print('---------')
print(df.to_json(path_or_buf=None, orient='records'))

[{'name': 'Wes', 'age': 32, 'pet': 'cat'}, {'name': 'Scott', 'age': 27, 'pet': None}, {'name': 'Katie', 'age': 30, 'pet': 'dog'}]
---------
   age   name   pet
0   32    Wes   cat
1   27  Scott  None
2   30  Katie   dog
---------
{"age":{"0":32,"1":27,"2":30},"name":{"0":"Wes","1":"Scott","2":"Katie"},"pet":{"0":"cat","1":null,"2":"dog"}}
---------
[{"age":32,"name":"Wes","pet":"cat"},{"age":27,"name":"Scott","pet":null},{"age":30,"name":"Katie","pet":"dog"}]


# Excel Files
Packages `xlrd`, `xlwt`, `openpyxl` must first be installed.

## Reading

In [None]:
import pandas as pd

# load Excel file:
filename = 'some_file.xlsx'
xls_file = pd.ExcelFile(filename)

# print sheet names
print(xls_file.sheet_names)

# load a sheet
xls_file.parse('sheet_name')  # specify sheet name
xls_file.parse(0)  # specify sheet index
xls_file.parse('sheet_name', header=1)  # give the row containing column names (default: 0)
xls_file.parse('sheet_name', usecols=[0, 2, 3])  # specify the columns to be parsed
xls_file.parse('sheet_name', names=['col1', 'col2'])  # rename the columns
  # For more arguments, see:
  # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_excel.html


## Writing

In [40]:
import pandas as pd

writer = pd.ExcelWriter('test.xlsx')
df.to_excel(writer, 'Sheet1')
writer.save()

# SAS Files

In [None]:
from sas7bdat import SAS7BDAT

# reading
with SAS7BDAT('filename.sas7bdat') as f:
    df = f.to_data_frame()

# Stata Files

In [None]:
import pandas as pd

# reading
df = pd.read_stata('filename')

# HDF5 Files

In [None]:
import h5py

# reading
data = h5py.File('filename.hdf5', 'r')

# MATLAB Files

In [None]:
import scipy.io

# reading
data = scipy.io.loadmat('filename.mat')  # `data` will be a dictionary