In [6]:
import pandas as pd

In [7]:
brics = pd.read_csv(r"https://raw.githubusercontent.com/MohamedMostafa259/Pandas-Notes/refs/heads/main/Data/brics.csv", index_col=0) # OR: index_col=["col0_name"]
brics

Unnamed: 0,country,capital,area,population
BR,Brazil,Brasilia,8.516,200.4
RU,Russia,Moscow,17.1,143.5
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0
SA,South Africa,Pretoria,1.221,52.98


# Add A New Column To A DataFrame
#### Approach 1: Traditional For Loops
- In general, for loops in Python are not efficient 
	- When you find a function that performs a task as the for loop, go ahead and use it instead of the for loop because most likely it would be internally implemented using C or Fortran, which are much more efficient than Python loops
#### Approach 2: List Comprehensions
- Readability: They make the code more readable and concise
- Efficiency: Faster than traditional for-loops due to Python's internal optimizations, but do not reach the performance level of true vectorized operations
#### Approach 3: Vectorized Operations
- refer to applying a function or operation on entire arrays or data structures, such as pandas Series or DataFrames, at once
- Easier, readable, and more efficient (avoid the overhead of Python loops and repeated DataFrame index operations)

In [8]:
print(brics)
# Approach 1: Traditional For Loop
for lab, row in brics.iterrows(): 
	brics.loc[lab, "name_length1"] = len(row["country"])

# Approach 2: List Comprehensions
brics["name_length2"] = [len(x) for x in brics["country"]]

# Approach 3: Vectorized Operations 
# apply len() function on each element of the "country" column and assign the result to its corresponding cell in the "name_length" column
brics["name_length3"] = brics["country"].apply(len) 
brics["name_length4"] = brics["country"].map(len) 
brics

         country    capital    area  population
BR        Brazil   Brasilia   8.516      200.40
RU        Russia     Moscow  17.100      143.50
IN         India  New Delhi   3.286     1252.00
CH         China    Beijing   9.597     1357.00
SA  South Africa   Pretoria   1.221       52.98


Unnamed: 0,country,capital,area,population,name_length1,name_length2,name_length3,name_length4
BR,Brazil,Brasilia,8.516,200.4,6.0,6,6,6
RU,Russia,Moscow,17.1,143.5,6.0,6,6,6
IN,India,New Delhi,3.286,1252.0,5.0,5,5,5
CH,China,Beijing,9.597,1357.0,5.0,5,5,5
SA,South Africa,Pretoria,1.221,52.98,12.0,12,12,12


In [9]:
# similar to dictionaries , similar to Numpy element-wise operations
brics["population_ind"] = brics["population"] * 1e6
brics

Unnamed: 0,country,capital,area,population,name_length1,name_length2,name_length3,name_length4,population_ind
BR,Brazil,Brasilia,8.516,200.4,6.0,6,6,6,200400000.0
RU,Russia,Moscow,17.1,143.5,6.0,6,6,6,143500000.0
IN,India,New Delhi,3.286,1252.0,5.0,5,5,5,1252000000.0
CH,China,Beijing,9.597,1357.0,5.0,5,5,5,1357000000.0
SA,South Africa,Pretoria,1.221,52.98,12.0,12,12,12,52980000.0


# Dropping Columns

In [10]:
# return None, but if inplace=False, it return a modified copy 
brics.drop(columns=["name_length1", "name_length2", "name_length3", "name_length4", "population_ind"], inplace=True) 
# Equivalent to:
# del brics["name_length1"]
# del brics["name_length2"]
# del brics["name_length3"]
# del brics["name_length4"] 
# -----------------------
# We may want to drop some values of a certain column, not the entire column ↓↓ pass the indices you want to drop to the `drop` function ↓↓ 
	# → e.g., movies.drop(movies[movies['avg_rating'] > 5].index, inplace=True) 
	# Equivalent to → movies = movies[movies['avg_rating'] <= 5]
	# OR we may replace them with custom minimums & maximums → movies.loc[movies['avg_rating'] > 5, 'avg_rating'] = 5
brics

Unnamed: 0,country,capital,area,population
BR,Brazil,Brasilia,8.516,200.4
RU,Russia,Moscow,17.1,143.5
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0
SA,South Africa,Pretoria,1.221,52.98
