# The series data structure

In [2]:
import pandas as pd
pd.Series?

In [3]:
animals = ['Tiger', 'Bear', 'Moose']
pd.Series(animals)

0    Tiger
1     Bear
2    Moose
dtype: object

In [4]:
numbers = [1, 2, 3]
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [5]:
animals = ['Tiger', 'Bear', None]
pd.Series(animals)

0    Tiger
1     Bear
2     None
dtype: object

In [6]:
numbers = [1, 2, None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [7]:
import numpy as np
np.nan == None

False

In [8]:
np.nan == np.nan

False

In [9]:
np.isnan(np.nan)

True

In [10]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [11]:
s.index

Index(['Archery', 'Golf', 'Sumo', 'Taekwondo'], dtype='object')

In [12]:
s = pd.Series(['Tiger', 'Bear', 'Moose'], index=['India', 'America', 'Canada'])
s

India      Tiger
America     Bear
Canada     Moose
dtype: object

# Querying a Series

In [13]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [14]:
s.iloc[3]

'South Korea'

In [15]:
s.loc['Golf']

'Scotland'

In [16]:
s[3]

'South Korea'

In [17]:
s['Golf']

'Scotland'

In [18]:
s = pd.Series([100.00, 120.00, 101.00, 3.00])
s

0    100.0
1    120.0
2    101.0
3      3.0
dtype: float64

In [19]:
total = 0
for item in s:
    total+=item
print(total)

324.0


In [20]:
import numpy as np

total = np.sum(s)
print(total)

324.0


In [21]:
#this creates a big series of random numbers
s = pd.Series(np.random.randint(0,1000,10000))
s.head()

0    364
1    668
2    979
3    385
4    211
dtype: int64

In [22]:
len(s)

10000

In [27]:
# how much time does it take to execute?
%%timeit -n 100
summary = 0
for item in s:
    summary+=item

UsageError: Line magic function `%%timeit` not found.


In [26]:
%%timeit -n 100
summary = np.sum(s)

69.9 µs ± 4.69 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [28]:
s+=2 #adds two to each item in s using broadcasting
s.head()

0    366
1    670
2    981
3    387
4    213
dtype: int64

In [29]:
for label, value in s.iteritems():
    s.set_value(label, value+2)
s.head()

  


0    368
1    672
2    983
3    389
4    215
dtype: int64

In [30]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
for label, value in s.iteritems():
    s.loc[label]= value+2

3.88 s ± 23.4 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [31]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
s+=2

335 µs ± 42.3 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


# DataFrame Data Structure

In [32]:
import pandas as pd
purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})
df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])
df.head()

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5
Store 2,Vinod,Bird Seed,5.0


In [33]:
df.loc['Store 2']

Name                  Vinod
Item Purchased    Bird Seed
Cost                      5
Name: Store 2, dtype: object

# DataFrame

In [35]:
import pandas as pd
purchase_1 = pd.Series({'Name':'Chris','Item Purchased':'Dog Food','Cost':22.5})
purchase_1 = pd.Series({'Name':'Ram','Item Purchased':'Cat Food','Cost':2.5})
purchase_1 = pd.Series({'Name':'Hem','Item Purchased':'Goat Food','Cost':22.5})
df = pd.DataFrame([purchase_1,purchase_2,purchase_3],index = ['Store 1','Store 2','Store 3'])
df.head()

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Hem,Goat Food,22.5
Store 2,Kevyn,Kitty Litter,2.5
Store 3,Vinod,Bird Seed,5.0


In [36]:
df.loc['Store 2']

Name                     Kevyn
Item Purchased    Kitty Litter
Cost                       2.5
Name: Store 2, dtype: object

In [37]:
type(df.loc['Store 2'])

pandas.core.series.Series

In [38]:
df.loc['Store 1']

Name                    Hem
Item Purchased    Goat Food
Cost                   22.5
Name: Store 1, dtype: object

In [39]:
df['Item Purchased']

Store 1       Goat Food
Store 2    Kitty Litter
Store 3       Bird Seed
Name: Item Purchased, dtype: object

In [40]:
df.loc['Store 1']['Cost']

22.5

In [41]:
df.loc[:,['Name', 'Cost']]

Unnamed: 0,Name,Cost
Store 1,Hem,22.5
Store 2,Kevyn,2.5
Store 3,Vinod,5.0


In [43]:
df.drop('Store 1') # Drop the data

Unnamed: 0,Name,Item Purchased,Cost
Store 2,Kevyn,Kitty Litter,2.5
Store 3,Vinod,Bird Seed,5.0


In [46]:
copy_df = df.copy()

In [47]:
del copy_df['Name']
copy_df

Unnamed: 0,Item Purchased,Cost
Store 1,Goat Food,22.5
Store 2,Kitty Litter,2.5
Store 3,Bird Seed,5.0


In [49]:
df['Location']= None
df

Unnamed: 0,Name,Item Purchased,Cost,Location
Store 1,Hem,Goat Food,22.5,
Store 2,Kevyn,Kitty Litter,2.5,
Store 3,Vinod,Bird Seed,5.0,


In [50]:
df['Cost']*=0.8
print(df)

          Name Item Purchased  Cost Location
Store 1    Hem      Goat Food  18.0     None
Store 2  Kevyn   Kitty Litter   2.0     None
Store 3  Vinod      Bird Seed   4.0     None


In [51]:
# DataFrame indexing and Loading

In [52]:
costs = df['Cost']
costs

Store 1    18.0
Store 2     2.0
Store 3     4.0
Name: Cost, dtype: float64

In [53]:
costs += 2
costs

Store 1    20.0
Store 2     4.0
Store 3     6.0
Name: Cost, dtype: float64

In [54]:
costs*=2
costs

Store 1    40.0
Store 2     8.0
Store 3    12.0
Name: Cost, dtype: float64

In [55]:
df

Unnamed: 0,Name,Item Purchased,Cost,Location
Store 1,Hem,Goat Food,40.0,
Store 2,Kevyn,Kitty Litter,8.0,
Store 3,Vinod,Bird Seed,12.0,


In [56]:
# !cat hem.csv


In [58]:
# df = pd.read_csv('hem.csv')
# skip_rows