In [1]:
import numpy as np
import pandas as pd                     #import the pandas library

In [2]:
series = pd.Series([1,2,3,4],index=['a','b','c','d'])   #define a series data structure with index values
series

a    1
b    2
c    3
d    4
dtype: int64

In [3]:
series1 = pd.Series([4,5,6])            #if index is not explicitly defined, pandas will assign the index
series1

0    4
1    5
2    6
dtype: int64

In [9]:
series_duplicate = pd.Series([1,1,2,3,4],index=['red','white','green','blue','yellow'])  #series datastructure containing two 1's
series_duplicate.unique()   #this will display only unique values and neglect duplicates

array([1, 2, 3, 4])

In [10]:
series_duplicate1 = pd.Series([1,1,2,2,3,4],index=['red','white','green','blue','yellow','black'])
series_duplicate1.unique()

array([1, 2, 3, 4])

In [11]:
series_duplicate.value_counts()       #used to display the no. of times an element is repeating

1    2
4    1
3    1
2    1
dtype: int64

In [12]:
series_duplicate1.value_counts()

2    2
1    2
4    1
3    1
dtype: int64

In [14]:
series1.isin([0])          #to check whether an element is present or no, here 0 is not present

0    False
1    False
2    False
dtype: bool

In [18]:
series.isin([0,2])     #here 0 is absent but 2 is present, so True in the index 'b'

a    False
b     True
c    False
d    False
dtype: bool

In [22]:
series1[series1.isin([5])]       #checks for the index of the reqd element

1    5
dtype: int64

In [24]:
#check for null or missing values,i.e,NaN(Not a Number) values
series_null = pd.Series([0,3,6,np.NaN,8,np.NaN])
series_null.isnull()

0    False
1    False
2    False
3     True
4    False
5     True
dtype: bool

In [25]:
#check for values which are not NaN
series_null.notnull()

0     True
1     True
2     True
3    False
4     True
5    False
dtype: bool

## DataFrames

Multi-dimensional extension of Series data structure

In [29]:
dataframe = pd.DataFrame(np.arange(16).reshape((4,4)),index=['a','b','c','d'],columns=['cat','dog','sheep','hen'])
dataframe

Unnamed: 0,cat,dog,sheep,hen
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [32]:
#display columns,values,index
dataframe.columns

Index(['cat', 'dog', 'sheep', 'hen'], dtype='object')

In [33]:
dataframe.values

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [34]:
dataframe.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [37]:
dataframe.iloc[2]           #selecting second row

cat       8
dog       9
sheep    10
hen      11
Name: c, dtype: int64

In [38]:
dataframe.iloc[0]

cat      0
dog      1
sheep    2
hen      3
Name: a, dtype: int64

In [39]:
dataframe['sheep']       #column selection --> sheep is the column name

a     2
b     6
c    10
d    14
Name: sheep, dtype: int64

In [40]:
dataframe['dog']

a     1
b     5
c     9
d    13
Name: dog, dtype: int64

In [46]:
#select multiple rows
dataframe.iloc[[0,1]]

Unnamed: 0,cat,dog,sheep,hen
a,0,1,2,3
b,4,5,6,7


In [49]:
#selecting a specified number of rows
dataframe.iloc[0:3]

Unnamed: 0,cat,dog,sheep,hen
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11


In [50]:
#selecting multiple columns
dataframe[['dog','sheep']]

Unnamed: 0,dog,sheep
a,1,2
b,5,6
c,9,10
d,13,14


In [51]:
#selecting an element of a particular column
dataframe['cat'][3]

12

In [52]:
dataframe['dog'][0]

1

In [54]:
dataframe.isin([3])             #whether 3 is present or not

Unnamed: 0,cat,dog,sheep,hen
a,False,False,False,True
b,False,False,False,False
c,False,False,False,False
d,False,False,False,False


In [58]:
dataframe.isin([4,3])        #whether 3 and 4 are present

Unnamed: 0,cat,dog,sheep,hen
a,False,False,False,True
b,True,False,False,False
c,False,False,False,False
d,False,False,False,False


In [64]:
#filtering
dataframe[dataframe<2]     #elements of dataframe which are below 2, all others will become NaN

Unnamed: 0,cat,dog,sheep
a,0.0,1.0,
b,,,
c,,,
d,,,


In [65]:
dataframe[dataframe<4]

Unnamed: 0,cat,dog,sheep
a,0.0,1.0,2.0
b,,,
c,,,
d,,,


In [67]:
dataframe_copy = dataframe.copy()       #make a copy of dataframe,i.e, any change in dataframe is not affected in dataframe_copy

In [68]:
#deleting a column
del dataframe['sheep']

In [69]:
dataframe

Unnamed: 0,cat,dog
a,0,1
b,4,5
c,8,9
d,12,13


In [70]:
#transpose of dataframe
dataframe.T

Unnamed: 0,a,b,c,d
cat,0,4,8,12
dog,1,5,9,13
