# Pandas Series
A pandas series is like a column in a table. it is a 1-D array holding data or any type.

**Series from lists**

In [2]:
import numpy as np
import pandas as pd

In [3]:
# string
country = ['india', 'pakistan', 'china']
pd.Series(country)

Unnamed: 0,0
0,india
1,pakistan
2,china


In [4]:
# integer
runs = [100, 200, 300]
pd.Series(runs)

Unnamed: 0,0
0,100
1,200
2,300


In [5]:
# custom index
marks = [67,57,89,99]
subjects = ['maths', 'science', 'english', 'hindi']
pd.Series(marks, index=subjects)

Unnamed: 0,0
maths,67
science,57
english,89
hindi,99


In [8]:
# setting a name
marks = pd.Series(marks, index=subjects, name='Raja ke marks')
marks

Unnamed: 0,Raja ke marks
maths,67
science,57
english,89
hindi,99


# Series from dict

In [9]:
marks = {
    'maths': 67,
    'science': 57,
    'english': 89,
    'hindi': 99
}
marks_series = pd.Series(marks)
marks_series

Unnamed: 0,0
maths,67
science,57
english,89
hindi,99


**Series Attributes**

In [10]:
# size
marks_series.size

4

In [11]:
# dtype
marks_series.dtype

dtype('int64')

In [12]:
# name
marks_series.name

In [13]:
# is_unique
marks_series.is_unique

True

In [14]:
# index
marks_series.index

Index(['maths', 'science', 'english', 'hindi'], dtype='object')

In [15]:
# values
marks_series.values

array([67, 57, 89, 99])

**Series using read_csv**

In [25]:
# with one col
subs = pd.read_csv('/content/subs.csv')
subs

Unnamed: 0,Subscribers gained
0,48
1,57
2,40
3,43
4,44
...,...
360,231
361,226
362,155
363,144


In [27]:
# with 2 column
pd.read_csv('/content/kohli_ipl.csv')

Unnamed: 0,match_no,runs
0,1,1
1,2,23
2,3,13
3,4,12
4,5,1
...,...,...
210,211,0
211,212,20
212,213,73
213,214,25


In [29]:
series = pd.read_csv('/content/kohli_ipl.csv', index_col='match_no')['runs']
series

Unnamed: 0_level_0,runs
match_no,Unnamed: 1_level_1
1,1
2,23
3,13
4,12
5,1
...,...
211,0
212,20
213,73
214,25


**Series Methods**

In [30]:
# head and tail
series.head()

Unnamed: 0_level_0,runs
match_no,Unnamed: 1_level_1
1,1
2,23
3,13
4,12
5,1


In [31]:
subs.head()

Unnamed: 0,Subscribers gained
0,48
1,57
2,40
3,43
4,44


In [32]:
subs.head(3)

Unnamed: 0,Subscribers gained
0,48
1,57
2,40


In [33]:
subs.tail()

Unnamed: 0,Subscribers gained
360,231
361,226
362,155
363,144
364,172


In [34]:
# sample
subs.sample()

Unnamed: 0,Subscribers gained
227,214


In [35]:
df = pd.read_csv('/content/bollywood.csv')

In [36]:
df.head()

Unnamed: 0,movie,lead
0,Uri: The Surgical Strike,Vicky Kaushal
1,Battalion 609,Vicky Ahuja
2,The Accidental Prime Minister (film),Anupam Kher
3,Why Cheat India,Emraan Hashmi
4,Evening Shadows,Mona Ambegaonkar


In [38]:
df.sample()

Unnamed: 0,movie,lead
642,Satya 2,Puneet Singh Ratn


In [41]:
# value_counts -> movie
df.value_counts()

Unnamed: 0_level_0,Unnamed: 1_level_0,count
movie,lead,Unnamed: 2_level_1
Andaaz,Akshay Kumar,2
Tanu Weds Manu: Returns,Kangana Ranaut,2
Gabbar Is Back,Akshay Kumar,2
Noor (film),Sunny Leone,1
No Smoking (2007 film),John Abraham,1
...,...,...
Genius (2018 Hindi film),Utkarsh Sharma,1
Gayab,Tusshar Kapoor,1
Gauri: The Unborn,Mohan Azaad,1
Gattu,Naresh Sharma,1


In [45]:
vk = pd.read_csv('/content/kohli_ipl.csv')

In [46]:
vk

Unnamed: 0,match_no,runs
0,1,1
1,2,23
2,3,13
3,4,12
4,5,1
...,...,...
210,211,0
211,212,20
212,213,73
213,214,25


In [47]:
# sort_value -> inplace
vk.sort_values(by='runs', ascending=False)

Unnamed: 0,match_no,runs
127,128,113
125,126,109
122,123,108
119,120,100
163,164,100
...,...,...
92,93,0
129,130,0
205,206,0
206,207,0


In [52]:
vk.sort_values(by = ['runs'],ascending=False).head(1).values[0]

array([128, 113])

In [53]:
# sort_index -> inplace -> movies
vk.sort_index(ascending=False)

Unnamed: 0,match_no,runs
214,215,7
213,214,25
212,213,73
211,212,20
210,211,0
...,...,...
4,5,1
3,4,12
2,3,13
1,2,23


In [55]:
df

Unnamed: 0,movie,lead
0,Uri: The Surgical Strike,Vicky Kaushal
1,Battalion 609,Vicky Ahuja
2,The Accidental Prime Minister (film),Anupam Kher
3,Why Cheat India,Emraan Hashmi
4,Evening Shadows,Mona Ambegaonkar
...,...,...
1495,Hum Tumhare Hain Sanam,Shah Rukh Khan
1496,Aankhen (2002 film),Amitabh Bachchan
1497,Saathiya (film),Vivek Oberoi
1498,Company (film),Ajay Devgn


In [56]:
df.sort_index(inplace = True)

In [57]:
df

Unnamed: 0,movie,lead
0,Uri: The Surgical Strike,Vicky Kaushal
1,Battalion 609,Vicky Ahuja
2,The Accidental Prime Minister (film),Anupam Kher
3,Why Cheat India,Emraan Hashmi
4,Evening Shadows,Mona Ambegaonkar
...,...,...
1495,Hum Tumhare Hain Sanam,Shah Rukh Khan
1496,Aankhen (2002 film),Amitabh Bachchan
1497,Saathiya (film),Vivek Oberoi
1498,Company (film),Ajay Devgn


**Series Maths Methods**

In [58]:
#count
vk.count()

Unnamed: 0,0
match_no,215
runs,215


In [59]:
# sum and product
subs.sum()


Unnamed: 0,0
Subscribers gained,49510


In [60]:
subs.product()

Unnamed: 0,0
Subscribers gained,0


In [61]:
# mean , median, mode, std, var
subs.mean()

Unnamed: 0,0
Subscribers gained,135.643836


In [62]:
print(vk.median())
print(vk.mode())
print(vk.std())
print(vk.var())

match_no    108.0
runs         24.0
dtype: float64
     match_no  runs
0           1   0.0
1           2   NaN
2           3   NaN
3           4   NaN
4           5   NaN
..        ...   ...
210       211   NaN
211       212   NaN
212       213   NaN
213       214   NaN
214       215   NaN

[215 rows x 2 columns]
match_no    62.209324
runs        26.229801
dtype: float64
match_no    3870.000000
runs         688.002478
dtype: float64


In [63]:
# min/max
print(vk.min())
print(vk.max())

match_no    1
runs        0
dtype: int64
match_no    215
runs        113
dtype: int64


In [64]:
# describe
vk.describe()

Unnamed: 0,match_no,runs
count,215.0,215.0
mean,108.0,30.855814
std,62.209324,26.229801
min,1.0,0.0
25%,54.5,9.0
50%,108.0,24.0
75%,161.5,48.0
max,215.0,113.0


In [65]:
subs.describe()

Unnamed: 0,Subscribers gained
count,365.0
mean,135.643836
std,62.675023
min,33.0
25%,88.0
50%,123.0
75%,177.0
max,396.0


**Series Indexing**

In [66]:
# ineger indexing
x = pd.Series([12,34,56,78,1,34,54,23,32,99])
x[0]


np.int64(12)

**Series with python Functionalites**

In [67]:
# len / type / dir / sorted / max / min
print(len(x))
print(type(x))
print(dir(x))
print(sorted(x))
print(max(x))
print(min(x))

10
<class 'pandas.core.series.Series'>
['T', '_AXIS_LEN', '_AXIS_ORDERS', '_AXIS_TO_AXIS_NUMBER', '_HANDLED_TYPES', '__abs__', '__add__', '__and__', '__annotations__', '__array__', '__array_priority__', '__array_ufunc__', '__bool__', '__class__', '__column_consortium_standard__', '__contains__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__divmod__', '__doc__', '__eq__', '__finalize__', '__float__', '__floordiv__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__imod__', '__imul__', '__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__lt__', '__matmul__', '__mod__', '__module__', '__mul__', '__ne__', '__neg__', '__new__', '__nonzero__', '__or__', '__pandas_priority__', '__pos__', '__pow__', '__radd__', '__rand__', '__rdivmod__', '__reduce_

In [68]:
# type conversion
print(list(x))
print(tuple(x))
print(set(x))
print(dict(x))

[12, 34, 56, 78, 1, 34, 54, 23, 32, 99]
(12, 34, 56, 78, 1, 34, 54, 23, 32, 99)
{32, 1, 34, 99, 12, 78, 54, 23, 56}
{0: np.int64(12), 1: np.int64(34), 2: np.int64(56), 3: np.int64(78), 4: np.int64(1), 5: np.int64(34), 6: np.int64(54), 7: np.int64(23), 8: np.int64(32), 9: np.int64(99)}


In [70]:
x

Unnamed: 0,0
0,12
1,34
2,56
3,78
4,1
5,34
6,54
7,23
8,32
9,99


In [71]:
# membership operator
print(12 in x)
print(12 not in x)
print(12 in x.values)
print(12 not in x.values)

False
True
True
False


In [73]:
# loops
for i in x:
  print(i)


12
34
56
78
1
34
54
23
32
99


In [74]:
# arithmetic operations
print(x + 10)
print(x - 10)
print(x * 10)
print(x / 10)

0     22
1     44
2     66
3     88
4     11
5     44
6     64
7     33
8     42
9    109
dtype: int64
0     2
1    24
2    46
3    68
4    -9
5    24
6    44
7    13
8    22
9    89
dtype: int64
0    120
1    340
2    560
3    780
4     10
5    340
6    540
7    230
8    320
9    990
dtype: int64
0    1.2
1    3.4
2    5.6
3    7.8
4    0.1
5    3.4
6    5.4
7    2.3
8    3.2
9    9.9
dtype: float64


In [76]:
# Relational operators
print(x > 50)
print(x < 50)
print(x >= 50)
print(x <= 50)
print(x == 50)
print(x != 50)

0    False
1    False
2     True
3     True
4    False
5    False
6     True
7    False
8    False
9     True
dtype: bool
0     True
1     True
2    False
3    False
4     True
5     True
6    False
7     True
8     True
9    False
dtype: bool
0    False
1    False
2     True
3     True
4    False
5    False
6     True
7    False
8    False
9     True
dtype: bool
0     True
1     True
2    False
3    False
4     True
5     True
6    False
7     True
8     True
9    False
dtype: bool
0    False
1    False
2    False
3    False
4    False
5    False
6    False
7    False
8    False
9    False
dtype: bool
0    True
1    True
2    True
3    True
4    True
5    True
6    True
7    True
8    True
9    True
dtype: bool
