## Pandas - Series

In [1]:
import pandas as pd
import numpy as np

In [3]:
# In milions
g7_pop = pd.Series([35.454, 45.233, 64.222, 123.44, 45.22, 12.232, 98.444])

In [4]:
g7_pop

0     35.454
1     45.233
2     64.222
3    123.440
4     45.220
5     12.232
6     98.444
dtype: float64

In [4]:
g7_pop.name = "G7 population in millions"

In [5]:
g7_pop

0     35.454
1     45.233
2     64.222
3    123.440
4     45.220
5     12.232
6     98.444
Name: G7 population in millions, dtype: float64

In [6]:
g7_pop.index = [
    'Canada',
    'France',
    'Germany',
    'Italy',
    'Japan',
    'United States',
    'United Kingdon'
]

In [7]:
g7_pop

Canada             35.454
France             45.233
Germany            64.222
Italy             123.440
Japan              45.220
United States      12.232
United Kingdon     98.444
Name: G7 population in millions, dtype: float64

In [8]:
g7_pop ['Canada']

np.float64(35.454)

In [9]:
g7_pop.iloc[0]

np.float64(35.454)

### Conditional Selection

In [None]:
# g7_popupaltion
g7_pop[g7_pop > 70]

Italy             123.440
United Kingdon     98.444
Name: G7 population in millions, dtype: float64

In [11]:
g7_pop * 1000000

Canada             35454000.0
France             45233000.0
Germany            64222000.0
Italy             123440000.0
Japan              45220000.0
United States      12232000.0
United Kingdon     98444000.0
Name: G7 population in millions, dtype: float64

In [12]:
g7_pop > 70

Canada            False
France            False
Germany           False
Italy              True
Japan             False
United States     False
United Kingdon     True
Name: G7 population in millions, dtype: bool

In [13]:
g7_pop['Canada'] = 30.009

In [14]:
g7_pop

Canada             30.009
France             45.233
Germany            64.222
Italy             123.440
Japan              45.220
United States      12.232
United Kingdon     98.444
Name: G7 population in millions, dtype: float64

# Data Example

In [11]:
import pandas as pd
df = pd.read_csv("g7_data.csv")

In [17]:
print(df)

       Unnamed: 0 Quarter on previous quarter (%) Unnamed: 2 Unnamed: 3  \
0         Country                         2023 Q1    2023 Q2    2023 Q3   
1          Canada                             0.3       -0.4         -1   
2          France                            -0.1        0.6          0   
3         Germany                            -0.1       -0.2        0.1   
4           Italy                             0.4       -0.1        0.3   
5           Japan                             1.4        0.7       -0.9   
6  United Kingdom                            -0.2       -0.3       -0.4   
7   United States                             0.5        0.4          1   

  Unnamed: 4 Unnamed: 5 Unnamed: 6  Unnamed: 7 Annual (%)  
0    2023 Q4    2024 Q1    2024 Q2         NaN       2023  
1         -1       -0.2          .         NaN       -1.5  
2          0          .          .         NaN        0.3  
3       -0.5        0.2       -0.1         NaN       -1.1  
4          0        0.4 

In [25]:
df

Unnamed: 0,Country,2023 Q1,2023 Q2,2023 Q3,2023 Q4,2024 Q1,2024 Q2,Unnamed: 7,2023
0,Canada,0.3,-0.4,-1.0,-1,-0.2,.,,-1.5
1,France,-0.1,0.6,0.0,0,.,.,,0.3
2,Germany,-0.1,-0.2,0.1,-0.5,0.2,-0.1,,-1.1
3,Italy,0.4,-0.1,0.3,0,0.4,0.2,,1
4,Japan,1.4,0.7,-0.9,.,.,.,,.
5,United Kingdom,-0.2,-0.3,-0.4,-0.6,0.5,0.2,,-0.8
6,United States,0.5,0.4,1.0,0.7,0.2,0.6,,2


In [7]:
df.columns

Index(['Unnamed: 0', 'Quarter on previous quarter (%)', 'Unnamed: 2',
       'Unnamed: 3', 'Unnamed: 4', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7',
       'Annual (%)'],
      dtype='object')

In [8]:
df.index

RangeIndex(start=0, stop=8, step=1)

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Country     7 non-null      object 
 1   2023 Q1     7 non-null      float64
 2   2023 Q2     7 non-null      float64
 3   2023 Q3     7 non-null      float64
 4   2023 Q4     7 non-null      object 
 5   2024 Q1     7 non-null      object 
 6   2024 Q2     7 non-null      object 
 7   Unnamed: 7  0 non-null      float64
 8   2023        7 non-null      object 
dtypes: float64(4), object(5)
memory usage: 636.0+ bytes


In [23]:
df.size

72

In [24]:
df.shape

(8, 9)

In [14]:
df.describe()

Unnamed: 0,2023 Q1,2023 Q2,2023 Q3,Unnamed: 7
count,7.0,7.0,7.0,0.0
mean,0.314286,0.1,-0.128571,
std,0.552052,0.454606,0.701699,
min,-0.2,-0.4,-1.0,
25%,-0.1,-0.25,-0.65,
50%,0.3,-0.1,0.0,
75%,0.45,0.5,0.2,
max,1.4,0.7,1.0,


In [15]:
df.dtypes

Country        object
2023 Q1       float64
2023 Q2       float64
2023 Q3       float64
2023 Q4        object
2024 Q1        object
2024 Q2        object
Unnamed: 7    float64
2023           object
dtype: object

In [19]:
df.dtypes.value_counts()

object     5
float64    4
Name: count, dtype: int64

# Indexing, Selection and Slicing

In [24]:
df.loc['Canada']

KeyError: 'Canada'

In [26]:
df.iloc[-1]

Country       United States
2023 Q1                 0.5
2023 Q2                 0.4
2023 Q3                 1.0
2023 Q4                 0.7
2024 Q1                 0.2
2024 Q2                 0.6
Unnamed: 7              NaN
2023                      2
Name: 6, dtype: object

In [28]:
df['population']

KeyError: 'population'