In [4]:
import pandas as pd
import numpy as np

Pandas has two data structures, the series and dataframe

### Panda Series

In [5]:
g7_pop = pd.Series([36.8, 50.8, 65.3, 69.7, 80.5, 88.65, 90.0])

In [6]:
g7_pop

0    36.80
1    50.80
2    65.30
3    69.70
4    80.50
5    88.65
6    90.00
dtype: float64

In [7]:
### Series can have a name
g7_pop.name = "G7 Population in Millions"

In [8]:
g7_pop

0    36.80
1    50.80
2    65.30
3    69.70
4    80.50
5    88.65
6    90.00
Name: G7 Population in Millions, dtype: float64

In [9]:
#to check the type

g7_pop.dtype

dtype('float64')

In [10]:
#you can also check the values for each index

g7_pop.values

array([36.8 , 50.8 , 65.3 , 69.7 , 80.5 , 88.65, 90.  ])

In [11]:
type(g7_pop.values)

numpy.ndarray

In [12]:
# you can also call elements inside the array just as in python list

g7_pop[0]

36.8

In [14]:
g7_pop[1]

50.8

In [19]:
## Pandas series only support positive indexing, so [-1] will return an error
g7_pop[-1]

KeyError: -1

In [20]:
# to get element in a pandas series from the back or end, use 'tail' method
g7_pop.tail(1)

# 1, will print out the last element

6    90.0
Name: G7 Population in Millions, dtype: float64

In [24]:
g7_pop.tail(2)

# 2, will print out the last two element

5    88.65
6    90.00
Name: G7 Population in Millions, dtype: float64

In [25]:
g7_pop.index

RangeIndex(start=0, stop=7, step=1)

In [26]:
g7_pop

0    36.80
1    50.80
2    65.30
3    69.70
4    80.50
5    88.65
6    90.00
Name: G7 Population in Millions, dtype: float64

In [28]:
g7_pop[6]

90.0

In [29]:
# You can change or set the index of your pandas series


In [31]:
g7_pop.index = ['Canada', 'Brazil', 'Togo', 
                'Nigeria', 'Thailand', 'Japan', 'China']

In [32]:
g7_pop

Canada      36.80
Brazil      50.80
Togo        65.30
Nigeria     69.70
Thailand    80.50
Japan       88.65
China       90.00
Name: G7 Population in Millions, dtype: float64

In [33]:
# you can also create the series from scratch by assigning index, giving the series a name

In [35]:
flo = pd.Series([25, 28, 35, 20],
               index=['Bola', 'Tunde', 'Femi', 'Bukky'],
               name = 'family')

In [36]:
flo

Bola     25
Tunde    28
Femi     35
Bukky    20
Name: family, dtype: int64

In [40]:
# you can also create a new series from an existing series by specifying the index

new = pd.Series(flo, index=['Tunde', 'Bukky'])

In [41]:
new

Tunde    28
Bukky    20
Name: family, dtype: int64

In [42]:
new['Tunde']

28

In [43]:
# should incase, you still want to search for element based on their index number
# not the index name, use the method 'iloc'.
# with 'iloc' method, negative indexing will work

In [44]:
flo.iloc[0]

25

In [45]:
flo.iloc[-1]

20

In [51]:
flo

Bola     25
Tunde    28
Femi     35
Bukky    20
Name: family, dtype: int64

In [52]:
#you can also pass in multiple indexes at once
# this will require double square brackets
flo[['Bola', 'Tunde']]

Bola     25
Tunde    28
Name: family, dtype: int64

In [54]:
## slicing also works with pandas series with one difference from python list
# the upper limit is not printed in python list but in 
# pandas series, the upper limit is printed

flo['Bola':'Femi']

Bola     25
Tunde    28
Femi     35
Name: family, dtype: int64

### Operations and Methods

In [55]:
# you can do arithmetic operations on a panda series

g7_pop

Canada      36.80
Brazil      50.80
Togo        65.30
Nigeria     69.70
Thailand    80.50
Japan       88.65
China       90.00
Name: G7 Population in Millions, dtype: float64

In [57]:
g7_pop * 1_000_000

Canada      36800000.0
Brazil      50800000.0
Togo        65300000.0
Nigeria     69700000.0
Thailand    80500000.0
Japan       88650000.0
China       90000000.0
Name: G7 Population in Millions, dtype: float64

In Python, there is no difference between the numbers 1000000 and 1_000_000. Both representations are just different ways of writing the same integer value.

The underscore character (_) in Python is used as a visual separator for large numbers to enhance readability. When you write a number with underscores, Python ignores the underscores and 
treats the value the same as if the underscores were not present.

So, 1000000 and 1_000_000 represent the exact same integer value of one million. The use of underscores is purely for making the number more readable and easier to comprehend for humans, especially when dealing with large numbers.

In [58]:
g7_pop

Canada      36.80
Brazil      50.80
Togo        65.30
Nigeria     69.70
Thailand    80.50
Japan       88.65
China       90.00
Name: G7 Population in Millions, dtype: float64

In [59]:
g7_pop > 70

Canada      False
Brazil      False
Togo        False
Nigeria     False
Thailand     True
Japan        True
China        True
Name: G7 Population in Millions, dtype: bool

In [60]:
# pandas series is also mutable

g7_pop['Nigeria'] = 200

In [61]:
g7_pop

Canada       36.80
Brazil       50.80
Togo         65.30
Nigeria     200.00
Thailand     80.50
Japan        88.65
China        90.00
Name: G7 Population in Millions, dtype: float64