### Important Series Methods

In [7]:
import numpy as np
import pandas as pd

#### List of important methods
    astype()
    between()
    clip()
    drop_duplicates
    isnull()
    dropna()
    fillna()
    isin()
    apply()
    copy()

In [10]:
subs = pd.read_csv('subs.csv').squeeze()
subs

0       48
1       57
2       40
3       43
4       44
      ... 
360    231
361    226
362    155
363    144
364    172
Name: Subscribers gained, Length: 365, dtype: int64

In [16]:
vk = pd.read_csv('kohli_ipl.csv', index_col = 'match_no').squeeze()
vk

match_no
1       1
2      23
3      13
4      12
5       1
       ..
211     0
212    20
213    73
214    25
215     7
Name: runs, Length: 215, dtype: int64

In [20]:
movies = pd.read_csv('bollywood.csv', index_col = 'movie').squeeze()
movies

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Hum Tumhare Hain Sanam                    Shah Rukh Khan
Aankhen (2002 film)                     Amitabh Bachchan
Saathiya (film)                             Vivek Oberoi
Company (film)                                Ajay Devgn
Awara Paagal Deewana                        Akshay Kumar
Name: lead, Length: 1500, dtype: object

In [26]:
# astype(): data ka type change kar deta hai
import sys

sys.getsizeof(vk)

3472

In [38]:
# we can easily change the size of data type using astype()

sys.getsizeof(vk.astype('int16'))


2182

In [46]:
# between(): tells ki ek range ke ander values exists krti hain ya nahi, returns boolean series

vk[vk.between(51,99)]
vk[vk.between(51,99)].size

43

In [50]:
# clip: clip within a range

subs

0       48
1       57
2       40
3       43
4       44
      ... 
360    231
361    226
362    155
363    144
364    172
Name: Subscribers gained, Length: 365, dtype: int64

In [54]:
subs.clip(100,200)   # match the values before and after the clip() all ranges between 100-200

0      100
1      100
2      100
3      100
4      100
      ... 
360    200
361    200
362    155
363    144
364    172
Name: Subscribers gained, Length: 365, dtype: int64

In [58]:
# drop_duplicates(): drop duplicate values in datasets

temp = pd.Series([1,1,2,2,3,3,4,4])
temp

0    1
1    1
2    2
3    2
4    3
5    3
6    4
7    4
dtype: int64

In [62]:
temp.drop_duplicates()   # duplicates dropped

# if you want to keep last element from heap then use keep = 'last'
temp.drop_duplicates(keep = 'last')

1    1
3    2
5    3
7    4
dtype: int64

In [72]:
# duplicates(): tells there are duplicates or not, returns boolean series

temp.duplicated()    # returns ture or false if item occurence is first time returns False otherwise true
#temp.duplicated().sum()

0    False
1     True
2    False
3     True
4    False
5     True
6    False
7     True
dtype: bool

In [74]:
vk.duplicated().sum()

137

In [64]:
movies.drop_duplicates()

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Sssshhh...                              Tanishaa Mukerji
Rules: Pyaar Ka Superhit Formula                  Tanuja
Right Here Right Now (film)                        Ankit
Talaash: The Hunt Begins...                Rakhee Gulzar
The Pink Mirror                          Edwin Fernandes
Name: lead, Length: 566, dtype: object

In [78]:
# how to work with missing values

temp = pd.Series([1,2,3,np.nan,5,6,np.nan,8,np.nan,10])
temp

0     1.0
1     2.0
2     3.0
3     NaN
4     5.0
5     6.0
6     NaN
7     8.0
8     NaN
9    10.0
dtype: float64

In [80]:
temp.size   # size returns total values

10

In [84]:
temp.count()     # count() counts only non-missing values; do not count missing values 

7

In [90]:
# isnull(): tells about ki data mein kitni missing values hain

vk.isnull()  # returns boolean True/False
vk.isnull().sum()   # returns sum

0

In [94]:
temp.isnull()
temp.isnull().sum()

3

#### Dealing with missing values

In [97]:
# dropna: to remove all missing values and display only non-missing values

temp.dropna()    # remove all missing values and display only non-missing values

0     1.0
1     2.0
2     3.0
4     5.0
5     6.0
7     8.0
9    10.0
dtype: float64

In [113]:
# fillna(): to fill the missing values with your choice or according to the requirement of project

temp.fillna(0)    # fiils the missing values with 0's

# let's we want to fill it with mean
mean = temp.mean()
temp.fillna(mean)   # fills missing values with mean

0     1.0
1     2.0
2     3.0
3     5.0
4     5.0
5     6.0
6     5.0
7     8.0
8     5.0
9    10.0
dtype: float64

In [121]:
# isin(): ek sath multiple cheezon ko check karta hain series ke ander

(vk == 49) | (vk == 99)
vk[(vk ==49) | (vk == 99)]   # writing such logics in large number creates weired situation. So we use isin()

match_no
82    99
86    49
Name: runs, dtype: int64

In [125]:
# isin()

vk.isin([49,99])    # returns boolean
vk[vk.isin([49,99])]    # returns same as above

match_no
82    99
86    49
Name: runs, dtype: int64

In [133]:
# apply(): helps you to apply custom logic to given series

movies

# let in movies we want to display only first word of actor name and diplay it in capital letter

movies.apply(lambda x:x.split()[0].upper())

movie
Uri: The Surgical Strike                  VICKY
Battalion 609                             VICKY
The Accidental Prime Minister (film)     ANUPAM
Why Cheat India                          EMRAAN
Evening Shadows                            MONA
                                         ...   
Hum Tumhare Hain Sanam                     SHAH
Aankhen (2002 film)                     AMITABH
Saathiya (film)                           VIVEK
Company (film)                             AJAY
Awara Paagal Deewana                     AKSHAY
Name: lead, Length: 1500, dtype: object

In [137]:
subs

0       48
1       57
2       40
3       43
4       44
      ... 
360    231
361    226
362    155
363    144
364    172
Name: Subscribers gained, Length: 365, dtype: int64

In [139]:
subs.mean()

135.64383561643837

In [141]:
# want to diplay good day and bad day if value of mean is below and above the given values

subs.apply(lambda x:'good day' if x > subs.mean() else 'bad day')

0       bad day
1       bad day
2       bad day
3       bad day
4       bad day
         ...   
360    good day
361    good day
362    good day
363    good day
364    good day
Name: Subscribers gained, Length: 365, dtype: object

### Usecase of copy:
##### When we do use of head() and tail() to get a preview of data; we don't get the copy of data, rather we get the view of data. So, basically we are working with original data. So head() and tail() are giving the view of data, they are not copy or replica. Which means if we make changes to head() and tail() they will reflect back to the original data. To  avoid this situation we use copy().

In [188]:
vk    # value of 1 =1, but after changing in new[1] = 100 changes reflected to original vk file. So be careful for this situation and use copy()

match_no
1      100
2       23
3       13
4       12
5        1
      ... 
212     20
213     73
214     25
215      7
0      100
Name: runs, Length: 216, dtype: int64

In [178]:
new = vk.head()
new

match_no
1     1
2    23
3    13
4    12
5     1
Name: runs, dtype: int64

In [180]:
new[1] = 100

In [184]:
new

match_no
1    100
2     23
3     13
4     12
5      1
Name: runs, dtype: int64

In [169]:
new = vk.head().copy()

In [171]:
new[1] = 100

In [173]:
new

match_no
1    100
2     23
3     13
4     12
5      1
Name: runs, dtype: int64

In [175]:
vk

match_no
1        1
2       23
3       13
4       12
5        1
      ... 
212     20
213     73
214     25
215      7
0      100
Name: runs, Length: 216, dtype: int64

In [None]:
# copy(): 
new = vk.head().copy()