# Querying a Series

In [2]:
import pandas as pd
sports = {'basketball': 'usa',
         'tenis': 'spain',
         'football': 'spain',
         'quidich': 'uk'}
s = pd.Series(sports)
s

basketball      usa
football      spain
quidich          uk
tenis         spain
dtype: object

In [3]:
s.iloc[3]

'spain'

In [5]:
s.loc['basketball']

'usa'

In [6]:
s['tenis']

'spain'

In [9]:
points = { 99: 'Me', 
          100: 'Raquel',
          101: 'Dirac',
          1000: 'God'}
p = pd.Series(points)
p

99          Me
100     Raquel
101      Dirac
1000       God
dtype: object

In [10]:
s = pd.Series([100.0, 30.0, 30.0, 45.0])
s

0    100.0
1     30.0
2     30.0
3     45.0
dtype: float64

The next way works, but it's slow:

In [11]:
total = 0
for i in s:
    total += i
print(total)

205.0


We're gonna use **vectorization**:

In [13]:
import numpy as np
total = np.sum(s)
print(total)

205.0


To see it well, we create a huge sample of random numbers

In [14]:
s = pd.Series(np.random.randint(0, 10000,10000))
s.head()  # print the first five numbers

0    3529
1    5543
2    5650
3    7743
4    4192
dtype: int64

In [15]:
print(len(s))

10000


The next function runs the code saveral times and gives us the average of the time elapsed

In [16]:
%%timeit -n 100
summary = 0
for item in s:
    summary += item
    

100 loops, best of 3: 1.3 ms per loop


In [17]:
%%timeit -n 100
summary = np.sum(s)

100 loops, best of 3: 74.6 µs per loop


**Broadcasting**: with it we can apply some thing to every element of a series changing it.

In [18]:
s += 2
s.head()

0    3531
1    5545
2    5652
3    7745
4    4194
dtype: int64

In [21]:
for label, value in s.iteritems():
    s.set_value(label, value + 2)
s.head()

0    3535
1    5549
2    5656
3    7749
4    4198
dtype: int64

In [25]:
%%timeit -n 10
s = pd.Series(np.random.randint(0, 10000, 10000))
for label, value in s.iteritems():
    s.loc[label] = value + 2

10 loops, best of 3: 1.22 s per loop


In [27]:
%%timeit -n 10
s = pd.Series(np.random.randint(0, 10000, 10000))
s+2

10 loops, best of 3: 480 µs per loop


In [28]:
s = pd.Series([1, 2, 3, 4])
s.loc['Animal'] = 'Bear'
s

0            1
1            2
2            3
3            4
Animal    Bear
dtype: object

In [29]:
original_sports = pd.Series( {'basketball': 'usa',
         'tenis': 'spain',
         'football': 'spain',
         'quidich': 'uk'} )
cricket_loving_countries = pd.Series(['Australia',
                                      'Barbados',
                                      'Pakistan',
                                      'Narnia'], 
                                    index = ['Cricket',
                                             'Cricket',
                                             'Cricket',
                                             'Cricket'])
all_countries = original_sports.append(cricket_loving_countries)

In [30]:
all_countries

basketball          usa
football          spain
quidich              uk
tenis             spain
Cricket       Australia
Cricket        Barbados
Cricket        Pakistan
Cricket          Narnia
dtype: object

In [31]:
cricket_loving_countries

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket       Narnia
dtype: object