## Importing Pandas

In [1]:
import pandas as pd 
import numpy as np 

## Series 
Series is a one-dimensional labeled array capable of holding any data type (integers, strings, floating point numbers, Python objects, etc.). The axis labels are collectively referred to as the index. The basic method to create a Series is to call:
s = pd.Series(data, index=index)
Here, data can be many different things:

a Python dict

an ndarray

a scalar value (like 5)

The passed index is a list of axis labels. Thus, this separates into a few cases depending on what data is:

## Series from Lists

In [3]:
#Strings
Country=["India","USA","UK","Canada","Australia"]
pd.Series(Country)

0        India
1          USA
2           UK
3       Canada
4    Australia
dtype: object

In [4]:
#Integers
Age=[23,45,67,89,34]
pd.Series(Age)

0    23
1    45
2    67
3    89
4    34
dtype: int64

In [6]:
#Custom Index 
marks=[23,45,67,89,34]
subjects=['Maths','Science','English','History','Geography']
pd.Series(marks, index=subjects)

#We have a parameter called index which can be used to assign custom index to the series in Pandas 

Maths        23
Science      45
English      67
History      89
Geography    34
dtype: int64

In [8]:
# Setting a name
# In the above example, we can provide a name to the series that we have created 
Result=pd.Series(marks, index=subjects,name='Marks')
#We have a parameter called name which can be used to assign a name to the series in Pandas

In [9]:
Result

Maths        23
Science      45
English      67
History      89
Geography    34
Name: Marks, dtype: int64

## Series from Dictionary 

In [15]:
marks={'Maths':23,'Science':45,'English':67,'History':89,'Geography':34}
marks_series=pd.Series(marks,name='Marks')
marks_series

Maths        23
Science      45
English      67
History      89
Geography    34
Name: Marks, dtype: int64

## Series Attributes 

In [12]:
# Size
marks_series.size

5

In [13]:
#dtype
marks_series.dtype


dtype('int64')

In [16]:
#name
marks_series.name

'Marks'

In [17]:
#is Unique 
marks_series.is_unique  

True

In [19]:
#Index
marks_series.index

Index(['Maths', 'Science', 'English', 'History', 'Geography'], dtype='object')

In [20]:
#Values
marks_series.values

array([23, 45, 67, 89, 34])

## Series using read_csv

In [29]:
# with one column
pd.read_csv('/Users/abhasjaiswal/Desktop/Data-Analysis-and-Visualisation/Pandas/datasets/subs.csv')
type(pd.read_csv('/Users/abhasjaiswal/Desktop/Data-Analysis-and-Visualisation/Pandas/datasets/subs.csv'))
#If we simply execute the above code, then it will display the data in the form of a table i.e. it is a pandas dataframe not series 
#If we want to read the data in the form of a series, then we need to use squeeze=True parameter in the read_csv() function
subs=pd.read_csv('/Users/abhasjaiswal/Desktop/Data-Analysis-and-Visualisation/Pandas/datasets/subs.csv').squeeze()
subs

0       48
1       57
2       40
3       43
4       44
      ... 
360    231
361    226
362    155
363    144
364    172
Name: Subscribers gained, Length: 365, dtype: int64

In [42]:
# with two columns
Kohli=pd.read_csv('/Users/abhasjaiswal/Desktop/Data-Analysis-and-Visualisation/Pandas/datasets/kohli_ipl.csv',index_col='match_no').squeeze()
Kohli

match_no
1       1
2      23
3      13
4      12
5       1
       ..
211     0
212    20
213    73
214    25
215     7
Name: runs, Length: 215, dtype: int64

In [43]:
movies=pd.read_csv('/Users/abhasjaiswal/Desktop/Data-Analysis-and-Visualisation/Pandas/datasets/bollywood.csv',index_col='movie').squeeze()
movies

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Hum Tumhare Hain Sanam                    Shah Rukh Khan
Aankhen (2002 film)                     Amitabh Bachchan
Saathiya (film)                             Vivek Oberoi
Company (film)                                Ajay Devgn
Awara Paagal Deewana                        Akshay Kumar
Name: lead, Length: 1500, dtype: object

# Series Methods

In [40]:
# Head and Tail 
subs.head() #By default, it will display the first 5 rows of the series

0    48
1    57
2    40
3    43
4    44
Name: Subscribers gained, dtype: int64

In [44]:
Kohli.head(3) # We can specify the number of rows that we want to display from the top of the series

match_no
1     1
2    23
3    13
Name: runs, dtype: int64

In [45]:
Kohli.tail() #By default, it will display the last 5 rows of the series

match_no
211     0
212    20
213    73
214    25
215     7
Name: runs, dtype: int64

In [47]:
#sample
Kohli.sample()
# it will display a random row from the series
# We can also specify the number of random rows that we want to display from the series

match_no
173    29
Name: runs, dtype: int64

In [48]:
# value_counts
movies.value_counts()
#It will display the count of each unique value in the series


lead
Akshay Kumar        48
Amitabh Bachchan    45
Ajay Devgn          38
Salman Khan         31
Sanjay Dutt         26
                    ..
Diganth              1
Parveen Kaur         1
Seema Azmi           1
Akanksha Puri        1
Edwin Fernandes      1
Name: count, Length: 566, dtype: int64

In [50]:
# sort_values
Kohli.sort_values()
Kohli.sort_values(ascending=False)  
# It will sort the series in ascending order
# We can also sort the series is descending order by using ascending=False parameter in the sort_values() function

match_no
128    113
126    109
123    108
164    100
120    100
      ... 
93       0
211      0
130      0
8        0
135      0
Name: runs, Length: 215, dtype: int64

In [54]:
Kohli.sort_values(ascending=False).head(1).values
# It is important to note that the data will not be changed untill we pass inplace parameter as True

array([113])

In [55]:
# sort_index 
movies.sort_index()

movie
1920 (film)                   Rajniesh Duggall
1920: London                     Sharman Joshi
1920: The Evil Returns             Vicky Ahuja
1971 (2007 film)                Manoj Bajpayee
2 States (2014 film)              Arjun Kapoor
                                   ...        
Zindagi 50-50                      Veena Malik
Zindagi Na Milegi Dobara        Hrithik Roshan
Zindagi Tere Naam           Mithun Chakraborty
Zokkomon                       Darsheel Safary
Zor Lagaa Ke...Haiya!            Meghan Jadhav
Name: lead, Length: 1500, dtype: object

In [56]:
movies
# As you can see here the data is not changed because we have not passed inplace parameter as True

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Hum Tumhare Hain Sanam                    Shah Rukh Khan
Aankhen (2002 film)                     Amitabh Bachchan
Saathiya (film)                             Vivek Oberoi
Company (film)                                Ajay Devgn
Awara Paagal Deewana                        Akshay Kumar
Name: lead, Length: 1500, dtype: object

In [58]:
movies.sort_index(inplace=True) 
# Now the data is changed because we have passed inplace parameter as True
movies

movie
1920 (film)                   Rajniesh Duggall
1920: London                     Sharman Joshi
1920: The Evil Returns             Vicky Ahuja
1971 (2007 film)                Manoj Bajpayee
2 States (2014 film)              Arjun Kapoor
                                   ...        
Zindagi 50-50                      Veena Malik
Zindagi Na Milegi Dobara        Hrithik Roshan
Zindagi Tere Naam           Mithun Chakraborty
Zokkomon                       Darsheel Safary
Zor Lagaa Ke...Haiya!            Meghan Jadhav
Name: lead, Length: 1500, dtype: object

In [60]:
movies.sort_values(inplace=True)    
movies

movie
Qaidi Band                            Aadar Jain
Roar: Tigers of the Sundarbans      Aadil Chahal
Lipstick Under My Burkha            Aahana Kumra
Raat Gayi Baat Gayi?                Aamir Bashir
3 Idiots                              Aamir Khan
                                        ...     
Dil Toh Deewana Hai                  Zeenat Aman
Strings of Passion                   Zeenat Aman
Sallu Ki Shaadi                      Zeenat Aman
Dunno Y... Na Jaane Kyon             Zeenat Aman
Taj Mahal: An Eternal Love Story     Zulfi Sayed
Name: lead, Length: 1500, dtype: object

## Series Maths Methods 

In [63]:
# Count 
# Note that it doesn't count the missing values
Kohli.count()

215

In [64]:
# Sum
subs.sum()

49510

In [65]:
subs.mean()

135.64383561643837

In [66]:
subs.median()  

123.0

In [67]:
subs.std()

62.6750230372527

In [68]:
subs.min()

33

In [74]:
subs.max()

396

In [70]:
movies.mode() 

0    Akshay Kumar
Name: lead, dtype: object

In [72]:
subs.std()

62.6750230372527

In [73]:
Kohli.var()

688.0024777222343

In [76]:
subs.describe()

count    365.000000
mean     135.643836
std       62.675023
min       33.000000
25%       88.000000
50%      123.000000
75%      177.000000
max      396.000000
Name: Subscribers gained, dtype: float64

In [77]:
subs.info()

<class 'pandas.core.series.Series'>
RangeIndex: 365 entries, 0 to 364
Series name: Subscribers gained
Non-Null Count  Dtype
--------------  -----
365 non-null    int64
dtypes: int64(1)
memory usage: 3.0 KB


## Series Indexing 

In [80]:
# integer based indexing
x=pd.Series([12,34,56,78,90,23,45,67,89,34])
x[0] # and so on

12

In [85]:
#Slicing 
movies.iloc[0:8]

movie
Qaidi Band                          Aadar Jain
Roar: Tigers of the Sundarbans    Aadil Chahal
Lipstick Under My Burkha          Aahana Kumra
Raat Gayi Baat Gayi?              Aamir Bashir
3 Idiots                            Aamir Khan
Mangal Pandey: The Rising           Aamir Khan
PK (film)                           Aamir Khan
Ghajini (2008 film)                 Aamir Khan
Name: lead, dtype: object

## Editing Series 

In [91]:
# Using Indexing 
marks_series.iloc[1]=100
marks_series

Maths         23
Science      100
English       67
History       89
Geography     34
Name: Marks, dtype: int64

In [93]:
# what if an Index does not exist  
marks_series.loc['Physics']=90
marks_series 
# it will create a new index and assign the value to it

Maths         23
Science      100
English       67
History       89
Geography     34
Physics       90
Name: Marks, dtype: int64

In [95]:
# Using Slicing
marks_series.iloc[1:3]=[10,10]
marks_series

Maths        23
Science      10
English      10
History      89
Geography    34
Physics      90
Name: Marks, dtype: int64

## Series with Python Functionalities 

In [101]:
#len/type/dir/sorted/max/min 
print(len(subs))
print(type(marks_series))
print(dir(subs))
print(sorted(subs))
print(max(subs))
print(min(subs))

365
<class 'pandas.core.series.Series'>
['T', '_AXIS_LEN', '_AXIS_ORDERS', '_AXIS_TO_AXIS_NUMBER', '_HANDLED_TYPES', '__abs__', '__add__', '__and__', '__annotations__', '__array__', '__array_priority__', '__array_ufunc__', '__bool__', '__class__', '__column_consortium_standard__', '__contains__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__divmod__', '__doc__', '__eq__', '__finalize__', '__float__', '__floordiv__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__imod__', '__imul__', '__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__lt__', '__matmul__', '__mod__', '__module__', '__mul__', '__ne__', '__neg__', '__new__', '__nonzero__', '__or__', '__pandas_priority__', '__pos__', '__pow__', '__radd__', '__rand__', '__rdivmod__', '__reduce

In [102]:
# type conversion 
subs.astype(float)

0       48.0
1       57.0
2       40.0
3       43.0
4       44.0
       ...  
360    231.0
361    226.0
362    155.0
363    144.0
364    172.0
Name: Subscribers gained, Length: 365, dtype: float64