**Pandas Series Methods**

In [2]:
import numpy as np
import pandas as pd

In [3]:
## Method1 : astype()
s = pd.Series([1,2,3,4])
print(s.dtype)

s_float = s.astype(float)
print(s_float)
print(s_float.dtype)

int64
0    1.0
1    2.0
2    3.0
3    4.0
dtype: float64
float64


In [4]:
string_series = pd.Series(['10','20','30','40'])
int_series = string_series.astype(int)
print(int_series.dtype)

int64


In [5]:
nums_series  = pd.Series(np.arange(1,10))
bool_series = nums_series.astype(bool)
print(bool_series)

0    True
1    True
2    True
3    True
4    True
5    True
6    True
7    True
8    True
dtype: bool


In [6]:
## between() - Range Checking
vk_runs = pd.read_csv('kohli_ipl.csv')
vk_runs = vk_runs.set_index('match_no')['runs']
vk_runs

Unnamed: 0_level_0,runs
match_no,Unnamed: 1_level_1
1,1
2,23
3,13
4,12
5,1
...,...
211,0
212,20
213,73
214,25


In [7]:
## checking the half centuries
half_centuries = vk_runs.between(50,99)
vk_runs[half_centuries]

Unnamed: 0_level_0,runs
match_no,Unnamed: 1_level_1
15,50
34,58
41,71
44,56
45,67
52,70
57,57
68,73
71,51
73,58


In [8]:
## how much centuries has vk scored
vk_runs[vk_runs >= 100]

Unnamed: 0_level_0,runs
match_no,Unnamed: 1_level_1
120,100
123,108
126,109
128,113
164,100


In [9]:
## clip()
#Values < lower bound → replaced with lower bound
#Values > upper bound → replaced with upper bound

nums_series = pd.Series([10,20,50,60,90,120])
print(nums_series.clip(50,90))

0    50
1    50
2    50
3    60
4    90
5    90
dtype: int64


**Methods useful for data cleaning**
1. drop_duplicates()
2. duplicated()
3. isnull()
4. dropna()
5. fillna()

In [10]:
## drop_duplicates()
s1 = pd.Series([1,2,2,3,3,3,4,4,4,4])
s1.drop_duplicates(keep = 'last') # remove the earlier occurance of element and keep the last one

Unnamed: 0,0
0,1
2,2
5,3
9,4


In [11]:
## duplicated() -> boolean series which returns true if value is duplicated
s1.duplicated().sum() # total six items are duplicated or repeated

np.int64(6)

In [12]:
##  isnull
s3 = pd.Series([1,2,3,np.nan,5,6,np.nan,8,9,np.nan])
s3

Unnamed: 0,0
0,1.0
1,2.0
2,3.0
3,
4,5.0
5,6.0
6,
7,8.0
8,9.0
9,


In [13]:
s3.isnull()

Unnamed: 0,0
0,False
1,False
2,False
3,True
4,False
5,False
6,True
7,False
8,False
9,True


In [14]:
## dropna: drop all the missing values
s3.dropna()

Unnamed: 0,0
0,1.0
1,2.0
2,3.0
4,5.0
5,6.0
7,8.0
8,9.0


In [15]:
## fillna: fill null values

person_age = pd.Series([50,45,20,np.nan,62,np.nan])
person_age = person_age.fillna(person_age.mean())

In [16]:
person_age

Unnamed: 0,0
0,50.0
1,45.0
2,20.0
3,44.25
4,62.0
5,44.25


In [17]:
## membership operator isin -> returns a boolean series
person_name = pd.Series(['shravan' , 'ayush' , 'mohit' , 'akshay' , 'sahil'])

person_name.isin(['shravan' , 'sahil' , 'ayush']) # are these names present in the series

Unnamed: 0,0
0,True
1,True
2,False
3,False
4,True


In [18]:
person_name[person_name.isin(['shravan' , 'sahil' , 'ayush'])]

Unnamed: 0,0
0,shravan
1,ayush
4,sahil


In [19]:
## apply() -> allows the user to implement custom logic
movies_data = pd.read_csv('bollywood.csv')
movies_data = movies_data.set_index('movie')['lead']

In [22]:
## applying the logic that the lead actor name should be in capital case
movies_data.apply(lambda x:x.upper()).values

array(['VICKY KAUSHAL', 'VICKY AHUJA', 'ANUPAM KHER', ..., 'VIVEK OBEROI',
       'AJAY DEVGN', 'AKSHAY KUMAR'], dtype=object)

In [28]:
daily_steps = pd.read_csv('steps_data.csv')
daily_steps = daily_steps.set_index('Day')['Steps']

In [30]:
## custom logic using apply: if no of steps > 6000 then good else bad
daily_steps.apply(lambda x: 'Good' if x > 6000 else 'Bad')


Unnamed: 0_level_0,Steps
Day,Unnamed: 1_level_1
Monday,Bad
Tuesday,Good
Wednesday,Bad
Thursday,Good
Friday,Good
Saturday,Good
Sunday,Good


When we use head or tail function, we get a view not a copy of the series , any changes made in the head or tail function will affet the original one,
so it is always better to make a copy of original one before making changes

In [36]:
vk_runs_copy = vk_runs.head().copy()

In [37]:
vk_runs_copy

Unnamed: 0_level_0,runs
match_no,Unnamed: 1_level_1
1,1
2,23
3,13
4,12
5,1


In [38]:
vk_runs_copy[1] = 24
vk_runs

Unnamed: 0_level_0,runs
match_no,Unnamed: 1_level_1
1,1
2,23
3,13
4,12
5,1
...,...
211,0
212,20
213,73
214,25


In [None]:
## ----------------end of series , To start dataframe-----------------------