# Getting Started with Data

Example code blocks showing how to load various data types into Python

*Designed for Python 3.5+*

In [None]:
#pandas is commonly imported as pd
import pandas as pd

#We'll import the other libraries as needed

## .csv — Comma-Separated Values

Desc

### Std Python

Conceptually, the basics of a CSV are just like splitting a string by commas:

In [None]:
csv_row = '1,2.0,Three point five,True'
print(csv_row.split(','))

### Pandas

In [None]:
print pd.read_csv(csv_row)

## .xls .xlsx — Excel Spreadsheet

Desc

### Std Python



In [19]:
import openpyxl
# you can also use xlrd, xlsxwriter, and a host of others. See http://www.python-excel.org/ for all your options.
wb = openpyxl.Workbook()
ws = wb.create_sheet("NewSheet")

ws['A1'] = "Name"
ws['A2'] = "Michael"
ws['B1'] = "fav_color"
ws['B2'] = "Purple"

wb.save("ExcelData.xlsx")


excel_row = 'ExcelData.xlsx'
data = openpyxl.load_workbook(excel_row)
for row in data:
    for cell in row:
        print(cell.value)

AttributeError: 'tuple' object has no attribute 'value'

### Pandas

In [None]:
# Suggestion: functions named after file type and if with Pandas
#   ie, csv_load and csv_pandas or similar

file = "some excel file"
# only making one Excel function, two outputs to demo
def Excel(file):

    # Pandas excel loader is built off of several other excel readers,
    # such as openXLRD and xlsxwriter

    # this is reflected in how many ways there are to read in an excel file.
    basicLoad = pd.read_excel(file)
    alternateLoad = pd.ExcelFile(file)

    # read_excel defaults to read the first sheet in an excel book
    # For a comprehensive list of parameters for read_excel, see: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html
    # you can specify the sheets you want by name
    sheetByName = pd.read_excel(file, sheetname="Sheet1")
    # by index
    sheetsByIndex = pd.read_excel(file, sheetname=[0, 1, 5])

    # if you don't know which sheets you want, you can specify header=None and
    # all sheets will be loaded in a nested structure:
    allSheetsByHeader = pd.read_excel(file, header=None)
    allSheetsBySheets = pd.read_excel(file, sheetname=0)

    # You can skip rows or columns
    subset = pd.read_excel(file, skip_footer=5, skiprows=2, names=["COLNAMES"])

    return basicLoad, alternateLoad, sheetByName, sheetsByIndex, allSheetsByHeader, allSheetsBySheets, subset

## .json — JavaScript Object Notation

Desc

### Std Python



In [None]:
import json
json_row = json.dumps([{"name": "Michael", "fav_color": "purple"}])

### Pandas

In [None]:
print(pd.read_json(json_row))

## .xml — eXtensible Markup Language

Desc

### Std Python




### Pandas

## .yml .yaml — Yet Another Markup Language

Desc

### Std Python




### Pandas

## .p .pkl — Pickle

Desc

### Std Python

In [None]:
import pickle

# issue here: pickle seems to need to be written to a file to be un-pickled?
pickle_row = pickle.dumps([{"name": "Michael", "fav_color": "purple"}])
# print(pickle.load(pickle_row))

### Pandas

In [None]:
# same issue as above: write to file
pd.read_pickle(pickle_row)