
# Data Structures in Pandas

## Pandas makes use of two different data structures
    *Series
    *Data Frame
    
### Series represents data in a 1D form while data frames represent data in a 2D tabular form     

In [10]:
import numpy as np
import pandas as pd

In [11]:
#pandas Series: one-dimensional array with labels

In [12]:
#obtain series from Python dictionaries (data structure for storing key-value pairs)
# keys of dictionaries act as index or label for values
dict={'a' : 3, 'b' : 'cat', 'c' : 2.5}
pd.Series(dict) #

a      3
b    cat
c    2.5
dtype: object

In [13]:
oneD = pd.Series([100, 'cat', 310, 'gog', 500], ['Amy', 'Bobby', 'Cat', 'Don', 'Emma'])
oneD

Amy      100
Bobby    cat
Cat      310
Don      gog
Emma     500
dtype: object

In [14]:
oneD = pd.Series([100, 'cat', 310, 'gog', 500], index=['Amy', 'Bobby', 'Cat', 'Don', 'Emma']) #the second list contains index
#index is the label of values
oneD

Amy      100
Bobby    cat
Cat      310
Don      gog
Emma     500
dtype: object

In [15]:
oneD.loc[['Cat','Emma']] #loc is a label-location based indexer for selection by labels- Cat and Donna

Cat     310
Emma    500
dtype: object

In [16]:
oneD[[0,3,4]] #extract the data at index 0, 3 and 4

Amy     100
Don     gog
Emma    500
dtype: object

In [17]:
oneD.iloc[1] #.iloc is primarily integer position based (from 0 to length-1 of the axis). access index 1

'cat'

In [18]:
#check if there is a cat in the series index

'cat' in oneD

False

In [19]:
'Cat' in oneD

True

In [20]:
#DataFrames- 2D data structure. Stores data in tabular form (rows and columns)
#<class 'pandas.core.frame.DataFrame'>

In [21]:
d = {'A' : pd.Series([100., 200., 300.], index=['apple', 'pear', 'orange']),
     'B' : pd.Series([111., 222., 333., 4444.], index=['apple', 'pear', 'orange', 'melon'])}

In [22]:
df = pd.DataFrame(d)
print(df) #when there are no values corresponding to an index then we have NaN

            A       B
apple   100.0   111.0
melon     NaN  4444.0
orange  300.0   333.0
pear    200.0   222.0


In [23]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [24]:
df.index #list index values

Index(['apple', 'melon', 'orange', 'pear'], dtype='object')

In [25]:
df.columns #column names

Index(['A', 'B'], dtype='object')

In [29]:
pd.DataFrame(df, index=['orange', 'melon', 'apple'], columns=['B']) #specify which row/index and column we want to retain

Unnamed: 0,B
orange,333.0
melon,4444.0
apple,111.0
