<a href="https://colab.research.google.com/github/AzadMehedi/Pandas/blob/main/Pandas_Series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# What is Pandas

Pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
built on top of the Python programming language.

https://pandas.pydata.org/about/index.html

# Pandas Series
A Pandas Series is like a column in a table. It is a 1-D array holding data of any type.

In [2]:
import pandas as pd
import numpy as np

In [None]:
country = ['Bangladesh', 'India', 'Pakistan','Nepal','Srilanka']
pd.Series(country)

0    Bangladesh
1         India
2      Pakistan
3         Nepal
4      Srilanka
dtype: object

In [None]:
# integers
runs = [32,43,12,57,100]
run_series = pd.Series(runs)
run_series

0     32
1     43
2     12
3     57
4    100
dtype: int64

In [None]:
from re import sub
# custom index
marks = [43,64,85,75,100]
subjects = ['math','eng','science','politics','social']
pd.Series(marks, index=subjects)

math         43
eng          64
science      85
politics     75
social      100
dtype: int64

In [None]:
# setting a name
pd.Series(marks, index=subjects, name='Mehedi Azad marks')

math         43
eng          64
science      85
politics     75
social      100
Name: Mehedi Azad marks, dtype: int64

# Series from dictionary

In [65]:
marks = {
    'math':78,
    'english':89,
    'Science':57,
    'Bangla':90

}

In [66]:
mark_series = pd.Series(marks, name='Mehedi mark')       # here mark_series is an object of Series class
                                                         # mark_series can access all the attributes of the Series class
mark_series

math       78
english    89
Science    57
Bangla     90
Name: Mehedi mark, dtype: int64

# Series Attributes

In [None]:
# size
mark_series.size

4

In [None]:
# dtype
mark_series.dtype

dtype('int64')

In [None]:
# name
mark_series.name

'Mehedi mark'

In [None]:
# is_unique
mark_series.is_unique

# pd.Series([1,1,2,3,5,5,4,3]).is_unique

True

In [None]:
# index
print(mark_series.index)

run_series.index

Index(['math', 'english', 'Science', 'Bangla'], dtype='object')


RangeIndex(start=0, stop=5, step=1)

In [None]:
# values
mark_series.values

array([78, 89, 57, 90])

# Series using read_csv

In [5]:
# with one col
import pandas as pd
pd.read_csv('/content/subs.csv')
# type(pd.read_csv('/content/subs.csv'))

Unnamed: 0,Subscribers gained
0,48
1,57
2,40
3,43
4,44
...,...
360,231
361,226
362,155
363,144


In [7]:
subs = pd.read_csv('/content/subs.csv', squeeze=True)
subs

0       48
1       57
2       40
3       43
4       44
      ... 
360    231
361    226
362    155
363    144
364    172
Name: Subscribers gained, Length: 365, dtype: int64

In [17]:
 # with 2 cols
vk = pd.read_csv('/content/kohli_ipl.csv', index_col='match_no', squeeze=True)
vk

match_no
1       1
2      23
3      13
4      12
5       1
       ..
211     0
212    20
213    73
214    25
215     7
Name: runs, Length: 215, dtype: int64

In [13]:
movie = pd.read_csv('/content/bollywood.csv', index_col='movie', squeeze=True)
movie

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Hum Tumhare Hain Sanam                    Shah Rukh Khan
Aankhen (2002 film)                     Amitabh Bachchan
Saathiya (film)                             Vivek Oberoi
Company (film)                                Ajay Devgn
Awara Paagal Deewana                        Akshay Kumar
Name: lead, Length: 1500, dtype: object

# Series Methods

In [15]:
# head 
movie.head()

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
Name: lead, dtype: object

In [16]:
# tail
movie.tail()

movie
Hum Tumhare Hain Sanam      Shah Rukh Khan
Aankhen (2002 film)       Amitabh Bachchan
Saathiya (film)               Vivek Oberoi
Company (film)                  Ajay Devgn
Awara Paagal Deewana          Akshay Kumar
Name: lead, dtype: object

In [19]:
# custom
movie.head(15)

movie
Uri: The Surgical Strike                       Vicky Kaushal
Battalion 609                                    Vicky Ahuja
The Accidental Prime Minister (film)             Anupam Kher
Why Cheat India                                Emraan Hashmi
Evening Shadows                             Mona Ambegaonkar
Soni (film)                             Geetika Vidya Ohlyan
Fraud Saiyaan                                   Arshad Warsi
Bombairiya                                      Radhika Apte
Manikarnika: The Queen of Jhansi              Kangana Ranaut
Thackeray (film)                         Nawazuddin Siddiqui
Amavas                                             Ali Asgar
Gully Boy                                      Ranveer Singh
Hum Chaar                                        Prit Kamani
Total Dhamaal                                     Ajay Devgn
Sonchiriya                              Sushant Singh Rajput
Name: lead, dtype: object

In [24]:
# sample   -> randomly choose movie
movie.sample()
# movie.sample(4)

movie
Revolver Rani    Kangana Ranaut
Name: lead, dtype: object

In [27]:
# value_counts
movie.value_counts()  # decending order default

Akshay Kumar        48
Amitabh Bachchan    45
Ajay Devgn          38
Salman Khan         31
Sanjay Dutt         26
                    ..
Diganth              1
Parveen Kaur         1
Seema Azmi           1
Akanksha Puri        1
Edwin Fernandes      1
Name: lead, Length: 566, dtype: int64

In [30]:
movie.value_counts(ascending=True)  # accending order

Sharib Hashmi        1
Ravi Kishan          1
Sagar Bhangade       1
Harish Chabbra       1
Bidita Bag           1
                    ..
Sanjay Dutt         26
Salman Khan         31
Ajay Devgn          38
Amitabh Bachchan    45
Akshay Kumar        48
Name: lead, Length: 566, dtype: int64

In [31]:
# sort_values
vk.sort_values()  # ascending default

match_no
87       0
211      0
207      0
206      0
91       0
      ... 
164    100
120    100
123    108
126    109
128    113
Name: runs, Length: 215, dtype: int64

In [35]:
# sort_values ->  if we use inplace=True, it will permanently change
vk.sort_values(ascending=False).head(5).values[1]

109

In [40]:
# sort_index  -> inplace  -> movies
movie.sort_index(inplace=True)

In [41]:
movie

movie
1920 (film)                   Rajniesh Duggall
1920: London                     Sharman Joshi
1920: The Evil Returns             Vicky Ahuja
1971 (2007 film)                Manoj Bajpayee
2 States (2014 film)              Arjun Kapoor
                                   ...        
Zindagi 50-50                      Veena Malik
Zindagi Na Milegi Dobara        Hrithik Roshan
Zindagi Tere Naam           Mithun Chakraborty
Zokkomon                       Darsheel Safary
Zor Lagaa Ke...Haiya!            Meghan Jadhav
Name: lead, Length: 1500, dtype: object

# Series math methods

In [42]:
# count   # don't count missing values, while size counts all values including missig values

In [43]:
movie.count()

1500

In [47]:
# sum  -> product
subs.sum()
# subs.product

49510

In [48]:
# mean
subs.mean()

135.64383561643837

In [50]:
# median
vk.median()

24.0

In [51]:
# mode 
movie.mode()

0    Akshay Kumar
dtype: object

In [52]:
# std
vk.std()

26.22980132830278

In [54]:
# variance
vk.var()

688.0024777222343

In [55]:
# min
subs.min()

33

In [56]:
# max
subs.max()

396

In [57]:
# describe
vk.describe()

count    215.000000
mean      30.855814
std       26.229801
min        0.000000
25%        9.000000
50%       24.000000
75%       48.000000
max      113.000000
Name: runs, dtype: float64

# Series Indexing

In [58]:
# integer indexing
x = pd.Series([12,12,14,35,46,57,58,79,9])
x[1]

12

In [59]:
# negetive indexing does work in integer but work on string
x[-1]

KeyError: ignored

In [68]:
movie[-1]   # string

'Meghan Jadhav'

In [69]:
# slicing
vk[5:16]

match_no
6      9
7     34
8      0
9     21
10     3
11    10
12    38
13     3
14    11
15    50
16     2
Name: runs, dtype: int64

In [70]:
vk[-5:]

match_no
211     0
212    20
213    73
214    25
215     7
Name: runs, dtype: int64

In [71]:
movie[-5:] 

movie
Zindagi 50-50                      Veena Malik
Zindagi Na Milegi Dobara        Hrithik Roshan
Zindagi Tere Naam           Mithun Chakraborty
Zokkomon                       Darsheel Safary
Zor Lagaa Ke...Haiya!            Meghan Jadhav
Name: lead, dtype: object

In [72]:
movie[::2] 

movie
1920 (film)                 Rajniesh Duggall
1920: The Evil Returns           Vicky Ahuja
2 States (2014 film)            Arjun Kapoor
3 A.M. (2014 film)             Salil Acharya
3 Idiots                          Aamir Khan
                                  ...       
Zero (2018 film)              Shah Rukh Khan
Zila Ghaziabad                  Vivek Oberoi
Zindaggi Rocks                  Sushmita Sen
Zindagi Na Milegi Dobara      Hrithik Roshan
Zokkomon                     Darsheel Safary
Name: lead, Length: 750, dtype: object

In [74]:
# fancy indexing
# find out 5,6,7 match result of virat
vk[[5,6,7]]

match_no
5     1
6     9
7    34
Name: runs, dtype: int64

In [78]:
# indexing with labels  -> fancy indexing
movie['Zindagi Na Milegi Dobara']

'Hrithik Roshan'