In [16]:
#A Dataframe is used to represent datasheet spreadsheet or tabular data
#It has a row index and column index and stores data in a 2D format
#The 2D format can be used to attain data from a higher ordered format as well

import pandas as pd
import numpy as np
from pandas import DataFrame, Series

In [4]:
#Constructor Methods for DataFrame objects

#1. Using Dict of equal size lists
Dictdata = {
                'Date': [11, 21, 3, 14, 8],
                'Month': ['January','July','March','December','August'],
                'Year': [2012, 2014, 1994, 1983, 2000]
            }

frame_1 = DataFrame(Dictdata)
frame_1

Unnamed: 0,Date,Month,Year
0,11,January,2012
1,21,July,2014
2,3,March,1994
3,14,December,1983
4,8,August,2000


In [7]:
#We can change the column order by specifying the desired sequence as:

frame_2 = DataFrame(Dictdata ,columns=['Month','Date','Year'])
frame_2

Unnamed: 0,Month,Date,Year
0,January,11,2012
1,July,21,2014
2,March,3,1994
3,December,14,1983
4,August,8,2000


In [8]:
#Like Series, we can generate index numbers based on our inputted values
frame_3 = DataFrame(Dictdata, columns = ['Day','Date','Month','Year'],
                   index = ['one','two','three','four','five'])

frame_3

Unnamed: 0,Day,Date,Month,Year
one,,11,January,2012
two,,21,July,2014
three,,3,March,1994
four,,14,December,1983
five,,8,August,2000


In [9]:
#From a certain dataframe, we can retrieve Series object as Dict key values
frame_3['Month']

one       January
two          July
three       March
four     December
five       August
Name: Month, dtype: object

In [10]:
frame_3.Year

one      2012
two      2014
three    1994
four     1983
five     2000
Name: Year, dtype: int64

In [13]:
#We can retrieve rows from DataFrames by using certain methods
#One such method is 1x indexing field

frame_3.ix['one']

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  after removing the cwd from sys.path.


Day          NaN
Date          11
Month    January
Year        2012
Name: one, dtype: object

In [14]:
#Columns of DataFrame can be modified by assignment as in case of Series

frame_3['Day'] = 'Monday'
frame_3

Unnamed: 0,Day,Date,Month,Year
one,Monday,11,January,2012
two,Monday,21,July,2014
three,Monday,3,March,1994
four,Monday,14,December,1983
five,Monday,8,August,2000


In [15]:
frame_3['Day'] = ['Monday','Wednesday','Friday','Saturday','Tuesday']
frame_3

Unnamed: 0,Day,Date,Month,Year
one,Monday,11,January,2012
two,Wednesday,21,July,2014
three,Friday,3,March,1994
four,Saturday,14,December,1983
five,Tuesday,8,August,2000


In [17]:
#In case of entering data, it should be noted that the length of the Series or Array being entered should be same otherwise NaN values are assigned

val = Series([12,31,23], index = ['two','four','five'])
frame_3['Day'] = val
frame_3

Unnamed: 0,Day,Date,Month,Year
one,,11,January,2012
two,12.0,21,July,2014
three,,3,March,1994
four,31.0,14,December,1983
five,23.0,8,August,2000


In [18]:
#Assigning columns that doesn't exist creates a new column
frame_3['Evenyear'] = ((frame_3.Year % 2) == 0)
frame_3

Unnamed: 0,Day,Date,Month,Year,Evenyear
one,,11,January,2012,True
two,12.0,21,July,2014,True
three,,3,March,1994,True
four,31.0,14,December,1983,False
five,23.0,8,August,2000,True


In [19]:
#Like Dictionaries del deletes a certain key value or column in case of DataFrames

del frame_3['Evenyear']
frame_3

Unnamed: 0,Day,Date,Month,Year
one,,11,January,2012
two,12.0,21,July,2014
three,,3,March,1994
four,31.0,14,December,1983
five,23.0,8,August,2000


In [22]:
#Constructing DataFrame using dict of dicts (nested dict)
month = {'July': {2010: 11, 1991: 21},
         'December': {1768: 14, 1991: 16}}

frame_4 = DataFrame(month)
frame_4

Unnamed: 0,July,December
2010,11.0,
1991,21.0,16.0
1768,,14.0


In [23]:
frame_4.T #Gives transpose of the obtained tabular form

Unnamed: 0,2010,1991,1768
July,11.0,21.0,
December,,16.0,14.0


In [24]:
DataFrame(month, index=[1991,1923,2001])

Unnamed: 0,July,December
1991,21.0,16.0
1923,,
2001,,


In [25]:
#We can also use Dicts of Series to construct DataFrames

series_1 = Series([11, 21], index=[2010, 1991])
series_2 = Series([14,16], index=[1768,1991])

frame_5 = DataFrame({'July': series_1, 'December': series_2})
frame_5

Unnamed: 0,July,December
1768,,14.0
1991,21.0,16.0
2010,11.0,


In [26]:
#Using 2d ndaarys to construct DataFrames
array_data = np.random.randn(4,4)
frame_6 = DataFrame(array_data)
frame_6

Unnamed: 0,0,1,2,3
0,-2.43009,0.678541,-1.168175,-0.330902
1,-0.100094,-0.751364,0.05271,0.316418
2,-0.460224,-0.956564,-1.989718,0.567721
3,-1.594954,0.485913,-0.67502,-0.50694


In [28]:
#New DataFrames can be constructed using previous constructed DataFrames as well

frame_7 = DataFrame(frame_6, columns=[0,2], index=['First','Second',3])
frame_7

Unnamed: 0,0,2
First,,
Second,,
3,-1.594954,-0.67502


In [29]:
#As in case of Series, we can name a certain index or column
frame_4

Unnamed: 0,July,December
2010,11.0,
1991,21.0,16.0
1768,,14.0


In [32]:
frame_4.index.name = 'Year'
frame_4.columns.name = 'Month:'
frame_4

Month:,July,December
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2010,11.0,
1991,21.0,16.0
1768,,14.0


In [33]:
#Like Series, the values attribute of DataFrame returns data values contained in it
frame_4.values

array([[11., nan],
       [21., 16.],
       [nan, 14.]])

In [34]:
#In case of different dtypes, dtype of value array is taken for every element
frame_3

Unnamed: 0,Day,Date,Month,Year
one,,11,January,2012
two,12.0,21,July,2014
three,,3,March,1994
four,31.0,14,December,1983
five,23.0,8,August,2000


In [35]:
frame_3.values

array([[nan, 11, 'January', 2012],
       [12.0, 21, 'July', 2014],
       [nan, 3, 'March', 1994],
       [31.0, 14, 'December', 1983],
       [23.0, 8, 'August', 2000]], dtype=object)