# Pandas

In [None]:
# Key Features of Pandas
1.Fast and efficient DataFrame object with default and customized indexing.
2.Tools for loading data into in-memory data objects from different file formats.
3.Data alignment and integrated handling of missing data.
4.Reshaping and pivoting of date sets.
5.Label-based slicing, indexing and subsetting of large data sets.
6.Columns from a data structure can be deleted or inserted.
7.Group by data for aggregation and transformations.
8.High performance merging and joining of data.
9.Time Series functionality.

In [2]:
import pandas as pd #this will import pandas into your workspace
import numpy as np 

In [None]:
Pandas deals with the following three data structures −

Series
DataFrame
Panel

# Series
Series is a one-dimensional labeled array capable of holding data of any type (integer, string, float, python objects, etc.). The axis labels are collectively called index.

Syntax:
======
pd.Series(data=None, index=None, dtype=None, name=None, copy=False, fastpath=False)

In [16]:
#Series Using List
s1=pd.Series([10,20,30])
s1

0    10
1    20
2    30
dtype: int64

In [17]:
#Series Using Dict
s1=pd.Series({10:'a',20:'b',30:'c'})
s1

10    a
20    b
30    c
dtype: object

In [18]:
#Series from ndarray
data = np.array(['a','b','c','d'])
s2 = pd.Series(data)
s2

0    a
1    b
2    c
3    d
dtype: object

In [9]:
#Series by Index
s3=pd.Series(['a','b','c','d'],index=[1,2,3,4])
s3

1    a
2    b
3    c
4    d
dtype: object

In [15]:
#Series from Scalar
s = pd.Series(5, index=[0, 1, 2, 3])
s

0    5
1    5
2    5
3    5
dtype: int64

In [32]:
#Retrieving/Accessing Data from Series with Position and Index
s = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e'])
s[4], s['d']

(5, 4)

In [33]:
s

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [35]:
#Retrieve multiple elements using a list of index label values
s[['a','c','d']]

a    1
c    3
d    4
dtype: int64

In [39]:
s[:3]

a    1
b    2
c    3
dtype: int64

In [46]:
s[-3:]

c    3
d    4
e    5
dtype: int64

In [20]:
#Series Working on Null Values
data=[1,np.NaN,np.NaN,np.NaN]
ser=pd.Series(data)
ser

0    1.0
1    NaN
2    NaN
3    NaN
dtype: float64

In [21]:
pd.isnull(ser)

0    False
1     True
2     True
3     True
dtype: bool

In [22]:
#Count of Null Values
pd.isnull(ser).sum()

3

In [23]:
pd.notnull(ser)

0     True
1    False
2    False
3    False
dtype: bool

In [24]:
#Count of Not Null Values
pd.notnull(ser).sum()

1

In [25]:
#Shape of the Series
ser.shape

(4,)

In [26]:
#Size of the Series
ser.size

4

# Series Basic Functionality


In [28]:
#axes
#Returns a list of the row axis labels
ser.axes

[RangeIndex(start=0, stop=4, step=1)]

In [30]:
#dtype
#Returns the dtype of the object.
ser.dtype

dtype('float64')

In [32]:
#empty
#Returns True if series is empty.
ser.empty

False

In [34]:
#ndim
#Returns the number of dimensions of the underlying data
ser.ndim

1

In [36]:
#size
#Returns the number of elements in the underlying data.
ser.size

4

In [38]:
#values
#Returns the Series as ndarray.
ser.values

array([ 1., nan, nan, nan])

In [40]:
#head()
#Returns the first n rows. By default it will fetch the first 5 Rows
ser.head()

0    1.0
1    NaN
2    NaN
3    NaN
dtype: float64

In [73]:
#tail()
#Returns the last n rows. By default it will fetch the Last 5 Rows
ser.tail()

0    1.0
1    NaN
2    NaN
3    NaN
dtype: float64

In [74]:
#Reindexing
ser2=ser.reindex(list('012345'))
ser2

0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
5   NaN
dtype: float64

In [76]:
N=20

df = pd.DataFrame({
   'A': pd.date_range(start='2016-01-01',periods=N,freq='D'),
   'x': np.linspace(0,stop=N-1,num=N),
   'y': np.random.rand(N),
   'C': np.random.choice(['Low','Medium','High'],N).tolist(),
   'D': np.random.normal(100, 10, size=(N)).tolist()
})

#reindex the DataFrame
df_reindexed = df.reindex(index=[0,2,5], columns=['A', 'C', 'B'])
df_reindexed

Unnamed: 0,A,C,B
0,2016-01-01,High,
2,2016-01-03,Low,
5,2016-01-06,High,


In [82]:
df_reindexed1 = df.reindex(index=list('ABCDEFGHIJKLMNOPQRSTUVWXYZ'))
df_reindexed1

Unnamed: 0,A,x,y,C,D
A,NaT,,,,
B,NaT,,,,
C,NaT,,,,
D,NaT,,,,
E,NaT,,,,
F,NaT,,,,
G,NaT,,,,
H,NaT,,,,
I,NaT,,,,
J,NaT,,,,


# DataFrame
A Data frame is a two-dimensional data structure, i.e., data is aligned in a tabular fashion in rows and columns.
Syntax:
======
pd.DataFrame(data=None, index=None, columns=None, dtype=None, copy=False)

In [45]:
#DataFrame using List
data = [1,2,3,4,5]
df = pd.DataFrame(data)
df

Unnamed: 0,0
0,1
1,2
2,3
3,4
4,5


In [66]:
#2 columns of List in Dataframe
data = [['Alex',10],['Bob',12],['Clarke',13]]
df1 = pd.DataFrame(data,columns=['Name','Age'])
df1

Unnamed: 0,Name,Age
0,Alex,10
1,Bob,12
2,Clarke,13


In [68]:
#changing the data type in DataFrame
df = pd.DataFrame(data,columns=['Name','Age'],dtype=float)
df

Unnamed: 0,Name,Age
0,Alex,10.0
1,Bob,12.0
2,Clarke,13.0


In [88]:
#DataFrame using the Dict
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricky'],'Age':[28,34,29,42]}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age
0,Tom,28
1,Jack,34
2,Steve,29
3,Ricky,42


In [85]:
#DataFrame from List of Dicts 
data = [{'a': 1, 'b': 2},{'a': 5, 'b': 10, 'c': 20}]
df = pd.DataFrame(data)
df

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [78]:
#DataFrame from ndarray
data = np.array(['a','b','c','d',1,2,4,5]).reshape(2,4)
df2 = pd.DataFrame(data,columns=['A','B','C','D'])
df2

Unnamed: 0,A,B,C,D
0,a,b,c,d
1,1,2,4,5


In [82]:
#DataFrame using the Index
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricky'],'Age':[28,34,29,42]}
df = pd.DataFrame(data, index=['rank1','rank2','rank3','rank4'])
df

Unnamed: 0,Name,Age
rank1,Tom,28
rank2,Jack,34
rank3,Steve,29
rank4,Ricky,42


In [86]:
#DataFrame from Dict of Series
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
   'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


In [None]:
LOC :
===
loc is label-based, which means that we have to specify the name of the rows and columns that we need to filter out.

IlOC:
=====
iloc is integer index-based. So here, we have to specify rows and columns by their integer index.

IX:
===
IX is combination of both we can used.

In [109]:
#column Selection in DataFrame as Series
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [111]:
#One Column fetching as DataFrame
df[['one']]

Unnamed: 0,one
a,1.0
b,2.0
c,3.0
d,


In [131]:
#Column Selection using loc as Series data because in Single column Fecting
df.loc[:,'one']

[a    1.0
 b    2.0
 c    3.0
 d    NaN
 Name: one, dtype: float64]

In [130]:
#Column Selection using loc as DataFrame because of multiple column
df.loc[:,'one':]

Unnamed: 0,one,two
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


In [100]:
#Row Selection in Data Frame 
#df['a']-- we cant select row like this
df.loc['a']
#here Row Selection happening in Series

one    1.0
two    1.0
Name: a, dtype: float64

In [101]:
#Row Selection happening in DataFrame
df.loc[['a']]

Unnamed: 0,one,two
a,1.0,1


In [105]:
df.iloc[0]

one    1.0
two    1.0
Name: a, dtype: float64

In [132]:
df.iloc[[0]]

Unnamed: 0,one,two
a,1.0,1


In [136]:
#Fetching Row and Colum 
df.loc[:,:]

Unnamed: 0,one,two
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


In [140]:
#Fetching Row and Colum 
df.loc[:,'two':]

Unnamed: 0,two
a,1
b,2
c,3
d,4


In [143]:
#Fetching Row and Colum using loc
df.loc['c':,:]

Unnamed: 0,one,two
c,3.0,3
d,,4


In [144]:
#Fetching Row and Colum using iloc
df.iloc[:,:]

Unnamed: 0,one,two
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


In [149]:
#Fetching Row and Colum using iloc
df.iloc[1:,1:]

Unnamed: 0,two
b,2
c,3
d,4


In [151]:
#Fetching Row and Colum using ix
df.ix[1:,'two':]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  


Unnamed: 0,two
b,2
c,3
d,4


In [153]:
#Fetching Row and Colum using ix
df.ix['b':,1:]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  


Unnamed: 0,two
b,2
c,3
d,4


In [169]:
#Column Addition
df['three']=[6,'a','b','c']
df

Unnamed: 0,one,two,three
a,1.0,1,6
b,2.0,2,a
c,3.0,3,b
d,,4,c


In [194]:
#column Addition
df=df.append([2.0,5,'d'])
df

  result = result.union(other)


Unnamed: 0,one,two,three,0
0,1.0,1.0,6,
1,2.0,2.0,a,
2,3.0,3.0,b,
3,,4.0,c,
4,,,,
5,,,,
6,,,,
7,,,,
8,,,,
9,,,,


In [191]:
#row Addition
df1=pd.DataFrame([[2.0,5,'d','f']],columns=['one','two','three',0],index=[17])
df=df.append(df1)
df

Unnamed: 0,one,two,three,0
0,1.0,1.0,6,
1,2.0,2.0,a,
2,3.0,3.0,b,
3,,4.0,c,
4,,,,2
5,,,,5
6,,,,d
7,,,,2
8,,,,5
9,,,,d


In [195]:
#column Deletion using del function-- its a permanent Delete
del df[0]
df

Unnamed: 0,one,two,three
0,1.0,1.0,6
1,2.0,2.0,a
2,3.0,3.0,b
3,,4.0,c
4,,,
5,,,
6,,,
7,,,
8,,,
9,,,


In [197]:
#column Deletion using pop function -- here it will show o/p which is deleted and it is a permanent delete
df.pop('two')

0     1.0
1     2.0
2     3.0
3     4.0
4     NaN
5     NaN
6     NaN
7     NaN
8     NaN
9     NaN
10    NaN
11    NaN
12    NaN
13    NaN
14    NaN
15    NaN
0     5.0
0     5.0
17    5.0
0     NaN
1     NaN
2     NaN
Name: two, dtype: float64

In [200]:
#Row Deletion Using the Drop function but it is not a permanent Drop
df.drop(0)

Unnamed: 0,one,three
1,2.0,a
2,3.0,b
3,,c
4,,
5,,
6,,
7,,
8,,
9,,
10,,


In [201]:
df

Unnamed: 0,one,three
0,1.0,6
1,2.0,a
2,3.0,b
3,,c
4,,
5,,
6,,
7,,
8,,
9,,


In [204]:
#by using this assignment operator row deletion will happen
df=df.drop(1)
df

Unnamed: 0,one,three
2,3.0,b
3,,c
4,,
5,,
6,,
7,,
8,,
9,,
10,,
11,,


In [13]:
#for Series 
df1=df['one']
df1[df1>1]

b    2.0
c    3.0
Name: one, dtype: float64

In [15]:
#for DataFrame
df[df>2]

Unnamed: 0,one,two
a,,
b,,
c,3.0,3.0
d,,4.0


# DataFrame Basic Functionality

In [43]:
#axes
#Returns a list of the row and Column axis labels for DataFrame
df.axes

[Index(['a', 'b', 'c', 'd'], dtype='object'),
 Index(['one', 'two'], dtype='object')]

In [45]:
#T or transpose()
#Transposes rows and columns.
df.T

Unnamed: 0,a,b,c,d
one,1.0,2.0,3.0,
two,1.0,2.0,3.0,4.0


In [48]:
#dtypes
#Returns the dtypes in this object.
df.dtypes

one    float64
two      int64
dtype: object

In [49]:
#empty
#True if NDFrame is entirely empty [no items]; if any of the axes are of length 0.
df.empty

False

In [50]:
#ndim
#Number of axes / array dimensions.
df.ndim

2

In [51]:
#shape
#Returns a tuple representing the dimensionality of the DataFrame.
df.shape

(4, 2)

In [52]:
#size
#Number of elements in the NDFrame.
df.size

8

In [53]:
#values
#Numpy representation of NDFrame. for values it will always through output as NDarray only
df.values

array([[ 1.,  1.],
       [ 2.,  2.],
       [ 3.,  3.],
       [nan,  4.]])

In [58]:
#head()
#Returns the first n rows
df.head(2)

Unnamed: 0,one,two
a,1.0,1
b,2.0,2


In [61]:
#tail()
#Returns last n rows.
df.tail(2)

Unnamed: 0,one,two
G,,
H,,


In [97]:
df

Unnamed: 0,one,two
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


# Reindexing
Reindexing changes the row labels and column labels of a DataFrame. To reindex means to conform the data to match a given set of labels along a particular axis.

Multiple operations can be accomplished through indexing like −
 1.Reorder the existing data to match a new set of labels.
 2.Insert missing value (NA) markers in label locations where no data for the label existed.

In [95]:
df1=df.reindex(index=['a','b','c','d','e','f'])
df1

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0
e,,
f,,


In [100]:
#Reindex to Align with Other Objects
df1 = pd.DataFrame(np.random.randn(10,3),columns=['col1','col2','col3'])
df2 = pd.DataFrame(np.random.randn(7,3),columns=['col1','col2','col3'])



In [102]:
df1

Unnamed: 0,col1,col2,col3
0,0.256597,2.05459,-0.979055
1,-1.107029,-0.686455,0.246142
2,1.103279,-0.255902,-0.768888
3,-0.781126,0.873766,-0.212839
4,-0.921674,1.408037,0.271216
5,-0.787973,1.148503,1.628017
6,0.280648,-1.839381,-0.477866
7,0.654217,0.512593,0.613537
8,-0.906027,-0.306182,1.652842
9,0.474766,1.085624,-0.828233


In [105]:
df2

Unnamed: 0,col1,col2,col3
0,0.716129,-0.487226,-1.664124
1,-2.847793,0.116717,0.402934
2,-1.279619,1.149115,-0.916469
3,-3.049499,-2.989306,-0.34053
4,-0.375388,-0.635923,-0.076979
5,0.13295,0.915065,0.583117
6,0.152474,0.594795,-1.269056


In [107]:
df1 = df1.reindex_like(df2)
df1

Unnamed: 0,col1,col2,col3
0,0.256597,2.05459,-0.979055
1,-1.107029,-0.686455,0.246142
2,1.103279,-0.255902,-0.768888
3,-0.781126,0.873766,-0.212839
4,-0.921674,1.408037,0.271216
5,-0.787973,1.148503,1.628017
6,0.280648,-1.839381,-0.477866


In [108]:
#Filling while ReIndexing
#reindex() takes an optional parameter method which is a filling method with values as follows −
#pad/ffill − Fill values forward
#bfill/backfill − Fill values backward
#nearest − Fill from the nearest index values

In [112]:
df2 = pd.DataFrame(np.random.randn(2,3),columns=['col1','col2','col3'])
df2

Unnamed: 0,col1,col2,col3
0,-0.52084,0.824669,0.794893
1,-0.296214,-1.139486,1.794681


In [117]:
df3=df2.reindex_like(df1)
df3

Unnamed: 0,col1,col2,col3
0,-0.52084,0.824669,0.794893
1,-0.296214,-1.139486,1.794681
2,,,
3,,,
4,,,
5,,,
6,,,


In [123]:
df4=df2.reindex_like(df1,method='ffill')
df4

Unnamed: 0,col1,col2,col3
0,-0.52084,0.824669,0.794893
1,-0.296214,-1.139486,1.794681
2,-0.296214,-1.139486,1.794681
3,-0.296214,-1.139486,1.794681
4,-0.296214,-1.139486,1.794681
5,-0.296214,-1.139486,1.794681
6,-0.296214,-1.139486,1.794681


In [129]:
#Limits on Filling while Reindexing
df5=df2.reindex_like(df1,method='ffill',limit=3)
df5

Unnamed: 0,col1,col2,col3
0,-0.52084,0.824669,0.794893
1,-0.296214,-1.139486,1.794681
2,-0.296214,-1.139486,1.794681
3,-0.296214,-1.139486,1.794681
4,-0.296214,-1.139486,1.794681
5,,,
6,,,


In [131]:
#Rename Column and Index
df1.rename(columns={'col1' : 'c1', 'col2' : 'c2'},index = {0 : 'apple', 1 : 'banana', 2 : 'durian'})

Unnamed: 0,c1,c2,col3
apple,-1.447227,-1.031659,1.859985
banana,-0.754141,-1.07926,0.721424
durian,-0.245107,-1.187244,0.936519
3,-0.895094,0.99404,0.718102
4,-0.482765,-1.044262,1.631344
5,-0.469062,0.45961,-0.050814


# Sorting

Two kinds of sorting available in Pandas. They are −
    1.By label
    2.By Actual Value


In [134]:
df.iloc[3:,0:1]=4.0

In [136]:
df

Unnamed: 0,one,two
a,1.0,1
b,2.0,2
c,3.0,3
d,4.0,4


# By Label
Using the sort_index() method, by passing the axis arguments and the order of sorting, DataFrame can be sorted.By default, sorting is done on row labels in ascending order.
syntax: 
-------
df.sort_index(axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, by=None)



In [148]:
#Sort By Rows by Default
df.sort_index()

Unnamed: 0,one,two,three
a,1.0,1,5
b,2.0,2,6
c,3.0,3,7
d,4.0,4,8


In [147]:
df.sort_index(ascending=False)

Unnamed: 0,one,two,three
d,4.0,4,8
c,3.0,3,7
b,2.0,2,6
a,1.0,1,5


In [144]:
#Added on column
df['three']=[5,6,7,8]
df

Unnamed: 0,one,two,three
a,1.0,1,5
b,2.0,2,6
c,3.0,3,7
d,4.0,4,8


In [149]:
#Sort By Columns 
#By passing the axis argument with a value 1, the sorting can be done on the column labels
df.sort_index(axis=1,ascending=False)

Unnamed: 0,two,three,one
a,1,5,1.0
b,2,6,2.0
c,3,7,3.0
d,4,8,4.0


# By Value
Like index sorting, sort_values() is the method for sorting by values. It accepts a 'by' argument which will use the column name of the DataFrame with which the values are to be sorted.

Syntax:
======

df.sort_values(by, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last')

In [154]:
df.sort_values('one')

Unnamed: 0,one,two,three
a,1.0,1,5
b,2.0,2,6
c,3.0,3,7
d,4.0,4,8


In [156]:
#Added on column
df['Four']=[8,6,7,9]
df

Unnamed: 0,one,two,three,Four
a,1.0,1,5,8
b,2.0,2,6,6
c,3.0,3,7,7
d,4.0,4,8,9


In [158]:
df.sort_values('Four')

Unnamed: 0,one,two,three,Four
b,2.0,2,6,6
c,3.0,3,7,7
a,1.0,1,5,8
d,4.0,4,8,9


In [162]:
df.sort_values(['Four','one'])

Unnamed: 0,one,two,three,Four
b,2.0,2,6,6
c,3.0,3,7,7
a,1.0,1,5,8
d,4.0,4,8,9


# Sorting Algorithm
sort_values() provides a provision to choose the algorithm from mergesort, heapsort and quicksort. 
Mergesort is the only stable algorithm.
By default 'quicksort'
     

In [164]:
df.sort_values(by='Four' ,kind='mergesort')

Unnamed: 0,one,two,three,Four
b,2.0,2,6,6
c,3.0,3,7,7
a,1.0,1,5,8
d,4.0,4,8,9


# Working with Text Data


In [166]:
s = pd.Series(['Tom', 'William Rick', 'John', 'Alber@t', np.nan, '1234','SteveSmith'])
s

0             Tom
1    William Rick
2            John
3         Alber@t
4             NaN
5            1234
6      SteveSmith
dtype: object

In [168]:
#lower()
#Converts strings in the Series/Index to lower case.
s.str.lower()

0             tom
1    william rick
2            john
3         alber@t
4             NaN
5            1234
6      stevesmith
dtype: object

In [174]:
#upper()
#Converts strings in the Series/Index to upper case.
s.str.upper()

0             TOM
1    WILLIAM RICK
2            JOHN
3         ALBER@T
4             NaN
5            1234
6      STEVESMITH
dtype: object

In [172]:
#len()
#Computes String length().
s.str.len()

0     3.0
1    12.0
2     4.0
3     7.0
4     NaN
5     4.0
6    10.0
dtype: float64

In [176]:
#strip()
#Helps strip whitespace(including newline) from each string in the Series/index from both the sides.
s.str.strip()

0             Tom
1    William Rick
2            John
3         Alber@t
4             NaN
5            1234
6      SteveSmith
dtype: object

In [178]:
#split(' ')
#Splits each string with the given pattern.
s.str.split(' ')

0              [Tom]
1    [William, Rick]
2             [John]
3          [Alber@t]
4                NaN
5             [1234]
6       [SteveSmith]
dtype: object

In [180]:
#cat(sep=' ')
#Concatenates the series/index elements with given separator.
#get_dummies()
#Returns the DataFrame with One-Hot Encoded values.s.str.cat(sep='_')

'Tom_William Rick_John_Alber@t_1234_SteveSmith'

# get_dummies()
Returns the DataFrame with One-Hot Encoded values.
Syntax:
========
pd.get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, columns=None, sparse=False, drop_first=False, dtype=None)

In [182]:
#get dummies for Series 
s.str.get_dummies()

Unnamed: 0,1234,Alber@t,John,SteveSmith,Tom,William Rick
0,0,0,0,0,1,0
1,0,0,0,0,0,1
2,0,0,1,0,0,0
3,0,1,0,0,0,0
4,0,0,0,0,0,0
5,1,0,0,0,0,0
6,0,0,0,1,0,0


In [186]:
df['five']=['a','b','v','g']
df

Unnamed: 0,one,two,three,Four,five
a,1.0,1,5,8,a
b,2.0,2,6,6,b
c,3.0,3,7,7,v
d,4.0,4,8,9,g


In [197]:
#Get Dummies for DataFrame
df1=df[['five']]
pd.get_dummies(df1)

Unnamed: 0,five_a,five_b,five_g,five_v
a,1,0,0,0
b,0,1,0,0
c,0,0,0,1
d,0,0,1,0


In [232]:
#contains(pattern)
#Returns a Boolean value True for each element if the substring contains in the element, else False.
d=s.str.contains(' ')
pd.DataFrame(s,d)

Unnamed: 0,0
False,Tom
True,William Rick
False,Tom
False,Tom
,
False,Tom
False,Tom


In [246]:
df=pd.DataFrame(s)
df

Unnamed: 0,0
0,Tom
1,William Rick
2,John
3,Alber@t
4,
5,1234
6,SteveSmith


In [234]:
#replace(a,b)
#Replaces the value a with the value b.
#Series
s.str.replace('@','$')

0             Tom
1    William Rick
2            John
3         Alber$t
4             NaN
5            1234
6      SteveSmith
dtype: object

In [240]:
#DataFrame
df.replace('John','$')

Unnamed: 0,0
0,Tom
1,William Rick
2,$
3,Alber@t
4,
5,1234
6,SteveSmith


In [241]:
#repeat(value)
#Repeats each element with specified number of times.
s.str.repeat(2)

0                      TomTom
1    William RickWilliam Rick
2                    JohnJohn
3              Alber@tAlber@t
4                         NaN
5                    12341234
6        SteveSmithSteveSmith
dtype: object

In [243]:
#For Repeat we cant apply DataFrame
df.repeat(df)

AttributeError: module 'pandas' has no attribute 'repeat'

In [244]:
#count(pattern)
#Returns count of appearance of pattern in each element.
s.str.count('m')

0    1.0
1    1.0
2    0.0
3    0.0
4    NaN
5    0.0
6    1.0
dtype: float64

In [248]:
df4.count()

col1    7
col2    7
col3    7
dtype: int64

In [249]:
df.count()

0    6
dtype: int64

In [250]:
#startswith(pattern)
#Returns true if the element in the Series/Index starts with the pattern.
s.str. startswith ('T')

0     True
1    False
2    False
3    False
4      NaN
5    False
6    False
dtype: object

In [251]:
 df.startswith('T')

AttributeError: 'DataFrame' object has no attribute 'startswith'

In [215]:
#endswith(pattern)
#Returns true if the element in the Series/Index ends with the pattern.
s.str.endswith('t')

0    False
1    False
2    False
3     True
4      NaN
5    False
6    False
dtype: object

In [253]:
#find(pattern)
#Returns the first position of the first occurrence of the pattern.
s.str.find('e')

0   -1.0
1   -1.0
2   -1.0
3    3.0
4    NaN
5   -1.0
6    2.0
dtype: float64

In [256]:
df.find()

AttributeError: 'DataFrame' object has no attribute 'find'

In [257]:
#findall(pattern)
#Returns a list of all occurrence of the pattern.
s.str.findall('e')

0        []
1        []
2        []
3       [e]
4       NaN
5        []
6    [e, e]
dtype: object

In [259]:
df.findall('e')

AttributeError: 'DataFrame' object has no attribute 'findall'

In [226]:
#swapcase
#Swaps the case lower/upper.
s.str.swapcase()

0             tOM
1    wILLIAM rICK
2            jOHN
3         aLBER@T
4             NaN
5            1234
6      sTEVEsMITH
dtype: object

In [228]:
#islower()
#Checks whether all characters in each string in the Series/Index in lower case or not. Returns Boolean
s.str.islower()

0    False
1    False
2    False
3    False
4      NaN
5    False
6    False
dtype: object

In [230]:
#isupper()
#Checks whether all characters in each string in the Series/Index in upper case or not. Returns Boolean.
s.str.isupper()

0    False
1    False
2    False
3    False
4      NaN
5    False
6    False
dtype: object

In [260]:
#isnumeric()
#Checks whether all characters in each string in the Series/Index are numeric. Returns Boolean.
s.str.isnumeric()

0    False
1    False
2    False
3    False
4      NaN
5     True
6    False
dtype: object