In [1]:
import pandas as pd
import numpy as np

In [2]:
brics = pd.read_csv(r"https://raw.githubusercontent.com/MohamedMostafa259/Pandas-Notes/refs/heads/main/Data/brics.csv", index_col=0) # OR: index_col=["col0_name"]
brics

Unnamed: 0,country,capital,area,population
BR,Brazil,Brasilia,8.516,200.4
RU,Russia,Moscow,17.1,143.5
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0
SA,South Africa,Pretoria,1.221,52.98


# Filtering Pandas DataFrames
Example: we want to select only countries with area > 8
1. Select the area column (Selecting a Series)
2. Make the appropriate comparison with it
3. Use the resulting boolean Series to filter your DataFrame

In [3]:
# How to filter a DataFrame? => DataFrame[Boolean Series or list]
print(brics[brics["area"] > 8]) # OR: brics.loc[brics["area"] > 8]
# because pandas is built on numpy, you can use np.logical_and() here
print(brics[np.logical_and(brics["area"] > 8, brics["area"] < 10)])
brics[(brics["area"] > 8) & (brics["area"] < 10)] # equivalent to the line above, but here we used bitwise, not logical operations
# & => and , | => or , ~ => not    (With bitwise operations, you must use parentheses in case there are two or more conditions)

   country   capital    area  population
BR  Brazil  Brasilia   8.516       200.4
RU  Russia    Moscow  17.100       143.5
CH   China   Beijing   9.597      1357.0
   country   capital   area  population
BR  Brazil  Brasilia  8.516       200.4
CH   China   Beijing  9.597      1357.0


Unnamed: 0,country,capital,area,population
BR,Brazil,Brasilia,8.516,200.4
CH,China,Beijing,9.597,1357.0


In [4]:
# If you want to filter on multiple values of a categorical variable, the easiest way is "isin" method
is_india_or_china = brics["country"].isin(["China", "India"]) # Series[bool]
brics[is_india_or_china]

Unnamed: 0,country,capital,area,population
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0


In [5]:
# filtering a Series
area_series = brics["area"]
print(area_series[area_series > 9]) 

RU    17.100
CH     9.597
Name: area, dtype: float64


# Iterating Over A DataFrame

In [6]:
for label, row in brics.iterrows(): # row is Series
	print(label)
	print(row)
print('\n')
for label, row in brics.iterrows():
	print(f"{label}: {row["capital"]}") # Choose the capital item of the row Series

BR
country         Brazil
capital       Brasilia
area             8.516
population       200.4
Name: BR, dtype: object
RU
country       Russia
capital       Moscow
area            17.1
population     143.5
Name: RU, dtype: object
IN
country           India
capital       New Delhi
area              3.286
population       1252.0
Name: IN, dtype: object
CH
country         China
capital       Beijing
area            9.597
population     1357.0
Name: CH, dtype: object
SA
country       South Africa
capital           Pretoria
area                 1.221
population           52.98
Name: SA, dtype: object


BR: Brasilia
RU: Moscow
IN: New Delhi
CH: Beijing
SA: Pretoria
