In [1]:
import pandas as pd

students = ['Naveen', 'Neeraj', 'Praveen']
pd.Series(students)

0     Naveen
1     Neeraj
2    Praveen
dtype: object

In [2]:
numbers = [1,2,3]
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [3]:
students = ['Naveen', 'Neeraj', None]
pd.Series(students)

0    Naveen
1    Neeraj
2      None
dtype: object

In [4]:
numbers = [1,2,None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [5]:
# NaN means "Not a Number" is a special floating point value
# Note - None and NaN are not the same

In [6]:
import numpy as np
np.nan == None

False

In [7]:
np.nan == np.nan

False

In [8]:
np.isnan(np.nan)

True

In [9]:
# A series can be created directly from dictionaries
students = {'Naveen' : 'Maths',
           'Neeraj' : 'Accounts',
           'Praveen' : 'Computing'}
serie = pd.Series(students)
serie

Naveen         Maths
Neeraj      Accounts
Praveen    Computing
dtype: object

In [10]:
# use index attribute to get index object
serie.index

Index(['Naveen', 'Neeraj', 'Praveen'], dtype='object')

In [11]:
# lets create a list of tuples
students = [('Naveen','Kumar'),('Neeraj','Kumar'),('Praveen','Khambra')]
s = pd.Series(students)
s

0       (Naveen, Kumar)
1       (Neeraj, Kumar)
2    (Praveen, Khambra)
dtype: object

In [12]:
# we can specify indexes while creating a series
se = pd.Series(['Qutub Minar','Char Minar','Golden Temple'], index = ['Delhi', 'Hyderabad','Punjab'])
se

Delhi          Qutub Minar
Hyderabad       Char Minar
Punjab       Golden Temple
dtype: object

In [14]:
# If our list of values in index object are not aligned with keys in the dictionary for creating the series then pandas
# automatically favors only and all of the indices values that we provided.
students_scores = {'Alice' : 'Physics',
                   'Jack' : 'Chemistry',
                   'Molly' : 'English'
                   }

ser = pd.Series(students_scores, index =['Alice', 'Molly', 'Sam'])
ser


Alice    Physics
Molly    English
Sam          NaN
dtype: object

# Querying Series

In [15]:
students_classes = {'Alice' : 'Physics',
                   'Jack' : 'Chemistry',
                   'Molly' : 'English',
                   'Sam' : 'History'}

s = pd.Series(students_classes)
s.iloc[3] # .iloc is to query at numeric location

'History'

In [16]:
s.loc['Molly']  # to query by index label

'English'

In [22]:
#lets check how much time it takes for a certain operation(here,avg.) by two different approaches- one is through loop and other
# through functions
import numpy as np

numbers = pd.Series(np.random.randint(0,1000,100000))
numbers.head()
len(numbers)

100000

In [None]:
# we will use timeit function to calculate time for performing an operation
# Note in order to use a cellular magic function, it has to be the first line of the cell.
# in default timeit run for 1000 loops and take the avg time, we will set for 100 loops

In [23]:
%%timeit -n 100
total = 0
for num in numbers:
    total += num
    
total/len(numbers)

11.1 ms ± 13.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
# lets try with vectorization

In [24]:
%%timeit -n 100
total = np.sum(numbers)
total/len(numbers)


135 µs ± 1.91 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
#lets take one more example of iterating vs vectorization

In [27]:
%%timeit -n 10
se = pd.Series(np.random.randint(0,1000,1000))
for label, value in se.iteritems():
    se.loc[label] = value + 2

se.head()   
                                    

127 ms ± 2.47 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [28]:
%%timeit -n 10
se = pd.Series(np.random.randint(0,1000,1000))
se += 2

The slowest run took 6.61 times longer than the fastest. This could mean that an intermediate result is being cached.
437 µs ± 470 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [30]:
###
s = pd.Series([1,2,3])
s.loc['History'] = 100
s
# so .loc attribute not only query the data based on index value but can also create a new data value and index value if
# already not present, this is important to note because it can alter your data if you dont know how and where to use .loc

0            1
1            2
2            3
History    100
dtype: int64

In [32]:
## up till now we saw series had different indices but it can have same indices as well
ser = pd.Series(['Physics','Maths', 'SST'], index=['Kelly', 'Kelly', 'Kelly'])
ser

Kelly    Physics
Kelly      Maths
Kelly        SST
dtype: object

In [35]:
serr = pd.Series({'Alice':'Physics',
                 'Jack':'Chemistry',
                 'Molly': 'History'})
serr

Alice      Physics
Jack     Chemistry
Molly      History
dtype: object

In [36]:
serr_new = serr.append(ser)
serr_new

Alice      Physics
Jack     Chemistry
Molly      History
Kelly      Physics
Kelly        Maths
Kelly          SST
dtype: object

In [37]:
serr

Alice      Physics
Jack     Chemistry
Molly      History
dtype: object

In [38]:
serr_new.loc['Kelly']

Kelly    Physics
Kelly      Maths
Kelly        SST
dtype: object