In [2]:
import pandas as pd
import numpy as np

### 1.read from an Excel file
documentation: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html

- pandas.read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0, index_col=None, names=None, usecols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, convert_float=True, converters=None, dtype=None, true_values=None, false_values=None, engine=None, squeeze=False, **kwds)
- Read an Excel table into a pandas DataFrame
- you don't need to have MS-Excel on your computer

In [40]:
file_name_string = '/Users/syamkumarj/Documents/Workspace/python_anaconda_workspace/Pandas/Employees.csv'
employees_df = pd.read_excel(file_name_string,  index_col=None, na_values=['NA'])

XLRDError: Unsupported format, or corrupt file: Expected BOF record; found b'A,B\n1,4\n'

In [3]:
employees_df

Unnamed: 0,Department,Name,YearsOfService,Grade
0,Marketing,Able,4,a
1,Engineering,Baker,7,b
2,Accounting,Charlie,12,c
3,Marketing,Delta,1,d
4,Engineering,Echo,15,f
5,Accounting,Foxtrot,9,a
6,Marketing,Golf,3,b
7,Engineering,Hotel,1,c
8,Accounting,India,2,d
9,Marketing,Juliet,5,f


### 2.write to a comma separated value (.csv) file
- documentation: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html
- Write DataFrame to a comma-separated values (csv) file
- DataFrame.to_csv(path_or_buf=None, sep=', ', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, mode='w', encoding=None, compression=None, quoting=None, quotechar='"', line_terminator='\n', chunksize=None, tupleize_cols=None, date_format=None, doublequote=True, escapechar=None, decimal='.')

In [33]:
d=pd.DataFrame({'A':[1,2,3],'B':[4,5,6]
})

In [39]:
#path to the output csv file
file_name_string_csv = '/Users/syamkumarj/Documents/Workspace/python_anaconda_workspace/Pandas/Employees.csv'
d.to_csv(file_name_string_csv , index=False)

### 3.Reading data from URL

In [43]:
from sklearn import datasets
iris=datasets.load_iris()
#iris.DESCR -# describes data set

In [45]:
myUrl='http://aima.cs.berkeley.edu/data/iris.csv'
import urllib
urlRequest=urllib.request.Request(myUrl)
iris_file=urllib.request.urlopen(urlRequest)
iris_fromUrl=pd.read_csv(iris_file,sep=',',header=None,decimal='.',names=['sepal_length','sepal_width','petal_length','petal_Width','traget'])

In [47]:
iris_fromUrl.tail(6)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_Width,traget
144,6.7,3.3,5.7,2.5,virginica
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica
149,5.9,3.0,5.1,1.8,virginica


### 4.Reading from database

- connecting to postgres
- install Psycopg2 with conda  
**conda install Psycopg2**
- 

In [53]:
#below general way WONT WORK with Pandas
import psycopg2
#conn_string="host='localhost' dbname='postgres' user='postgres' password='postgres'"
#conn = psycopg2.connect(conn_string)
#cursor=conn.cursor()
# df=pd.read_sql_table('currency',cursor)

##### we need to use create engine method from sqlalchemy

In [59]:
from sqlalchemy import create_engine
engine = create_engine('postgresql://postgres:postgres@localhost:5432/postgres')
df=pd.read_sql_table('currency',engine)
#df.index=['hi','hello']
df

Unnamed: 0,name,country_name,symbol
hi,RUPEES,INDIA,R
hello,Pound,United Kingdom,Pound Sign


### 5.Handling Large Data sets

#### we can split large datasets into small chunck by using iterators

In [73]:
myUrl='http://aima.cs.berkeley.edu/data/iris.csv'
import urllib
urlRequest=urllib.request.Request(myUrl)
iris_file=urllib.request.urlopen(urlRequest)


##### pulling is same as above but the only difference is we added  
***iterator*** attribute and set its value to **True**

In [74]:
my_iris_iterator=pd.read_csv(iris_file,sep=',',header=None,decimal='.',names=['sepal_length','sepal_width','petal_length','petal_Width','traget'],
                        iterator=True)

In [75]:
df=my_iris_iterator.get_chunk(10)
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_Width,traget
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
5,5.4,3.9,1.7,0.4,setosa
6,4.6,3.4,1.4,0.3,setosa
7,5.0,3.4,1.5,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
9,4.9,3.1,1.5,0.1,setosa


### 6 Read/Write JSON

#### read/write CSV

- read_json
- to_json

##### creating a json string

In [76]:
EmployeeRecords = [{'EmployeeID':451621, 'EmployeeName':'Preeti Jain', 'DOJ':'30-Aug-2008'},

{'EmployeeID':123621, 'EmployeeName':'Ashok Kumar', 'DOJ':'25-Sep-2016'},

{'EmployeeID':451589, 'EmployeeName':'Johnty Rhodes', 'DOJ':'04-Nov-2016'}]

In [77]:
import json
emp_records_json_str = json.dumps(EmployeeRecords)
df = pd.read_json(emp_records_json_str, orient='records', 
                  convert_dates=['DOJ'])
print(df)

         DOJ  EmployeeID   EmployeeName
0 2008-08-30      451621    Preeti Jain
1 2016-09-25      123621    Ashok Kumar
2 2016-11-04      451589  Johnty Rhodes


### Similar functions
[Write to Excel](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_excel.html)

[Read from .csv](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html)

SO MANY METHODS ARE available

Read/To Html  
Read/To Clipboard  
Read/To dict  
etc..

In [14]:
#help(pd.read_csv)
#help(pd.DataFrame.to_csv)

#### read/write Excel

In [15]:
#help(pd.read_excel)
#help(pd.DataFrame.to_excel)

#### read/write sql table

In [18]:
#help(pd.read_sql_table)
# help(pd.read_sql_query)
help(pd.DataFrame.to_sql)

Help on function to_sql in module pandas.core.generic:

to_sql(self, name, con, flavor=None, schema=None, if_exists='fail', index=True, index_label=None, chunksize=None, dtype=None)
    Write records stored in a DataFrame to a SQL database.
    
    Parameters
    ----------
    name : string
        Name of SQL table
    con : SQLAlchemy engine or DBAPI2 connection (legacy mode)
        Using SQLAlchemy makes it possible to use any DB supported by that
        library. If a DBAPI2 object, only sqlite3 is supported.
    flavor : 'sqlite', default None
        .. deprecated:: 0.19.0
           'sqlite' is the only supported option if SQLAlchemy is not
           used.
    schema : string, default None
        Specify the schema (if database flavor supports this). If None, use
        default schema.
    if_exists : {'fail', 'replace', 'append'}, default 'fail'
        - fail: If table exists, do nothing.
        - replace: If table exists, drop it, recreate it, and insert data.
        

#### read/write josn

In [21]:
#help(pd.read_json)
#help(pd.DataFrame.to_json)