# Adding & Removing Rows and Columns in Pandas

In [1]:
import pandas as pd

In [2]:
# Sample Titanic-like data
data = {
    "Name": ["Jack", "Rose", "Tom", "Ananda"],
    "Age": [22, 19, 25, 20],
    "Sex": ["male", "female", "male", "male"],
    "Survived": [1, 1, 0, 1],
    "Fare": [50, 100, 30, 60]
}

df = pd.DataFrame(data)
df


Unnamed: 0,Name,Age,Sex,Survived,Fare
0,Jack,22,male,1,50
1,Rose,19,female,1,100
2,Tom,25,male,0,30
3,Ananda,20,male,1,60


In [3]:
# Add a new column with default value
# Using .loc to add column
df.loc[:, "Embarked"] = "C"
df


Unnamed: 0,Name,Age,Sex,Survived,Fare,Embarked
0,Jack,22,male,1,50,C
1,Rose,19,female,1,100,C
2,Tom,25,male,0,30,C
3,Ananda,20,male,1,60,C


In [4]:
# Add a column based on calculation
df.loc[:, "Fare_after_tax"] = df["Fare"] * 1.10
df


Unnamed: 0,Name,Age,Sex,Survived,Fare,Embarked,Fare_after_tax
0,Jack,22,male,1,50,C,55.0
1,Rose,19,female,1,100,C,110.0
2,Tom,25,male,0,30,C,33.0
3,Ananda,20,male,1,60,C,66.0


In [6]:
# add another columns
df.loc[:,"Age increase"]=df["Age"]+10
df

Unnamed: 0,Name,Age,Sex,Survived,Fare,Embarked,Fare_after_tax,Age increase
0,Jack,22,male,1,50,C,55.0,32
1,Rose,19,female,1,100,C,110.0,29
2,Tom,25,male,0,30,C,33.0,35
3,Ananda,20,male,1,60,C,66.0,30


In [10]:
# Keep only selected columns using .loc
df=df.loc[:, ["Name", "Age", "Sex", "Survived", "Fare"]]



In [11]:
df

Unnamed: 0,Name,Age,Sex,Survived,Fare
0,Jack,22,male,1,50
1,Rose,19,female,1,100
2,Tom,25,male,0,30
3,Ananda,20,male,1,60


In [None]:
df

In [12]:
df.loc[5] = {"Name": "Leo", "Age": 30, "Sex": "male"}
# add new rows using the dictionary

In [13]:
df

Unnamed: 0,Name,Age,Sex,Survived,Fare
0,Jack,22,male,1.0,50.0
1,Rose,19,female,1.0,100.0
2,Tom,25,male,0.0,30.0
3,Ananda,20,male,1.0,60.0
5,Leo,30,male,,


for dictionary
Pandas will automatically fill missing columns with NaN.


In [15]:
import pandas as pd

# Sample DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Clara'],
    'Age': [25, 30, 28],
    'Sex': ['female', 'male', 'female']
})
df


Unnamed: 0,Name,Age,Sex
0,Alice,25,female
1,Bob,30,male
2,Clara,28,female


In [16]:
# Change Bob's Age and Sex
df.loc[df['Name'] == 'Bob', ['Age', 'Sex']] = [31, 'male']

df


Unnamed: 0,Name,Age,Sex
0,Alice,25,female
1,Bob,31,male
2,Clara,28,female


In [17]:
# Changes multiple columns
df.loc[df['Name'] == 'Clara', ['Age', 'Sex', 'Status']] = [29, 'female', 'Married']

df


Unnamed: 0,Name,Age,Sex,Status
0,Alice,25,female,
1,Bob,31,male,
2,Clara,29,female,Married


In [18]:
import pandas as pd

# Sample DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Clara'],
    'Age': [25, 30, 28]
})
df


Unnamed: 0,Name,Age
0,Alice,25
1,Bob,30
2,Clara,28


In [19]:
# Add a new column 'Sex'
df.loc[:, 'Sex'] = ['female', 'male', 'female']

df

Unnamed: 0,Name,Age,Sex
0,Alice,25,female
1,Bob,30,male
2,Clara,28,female


In [22]:
df.set_index("Age",inplace=True)

In [23]:
df

Unnamed: 0_level_0,Name,Sex
Age,Unnamed: 1_level_1,Unnamed: 2_level_1
25,Alice,female
30,Bob,male
28,Clara,female


In [29]:
df.loc[25,"Sex"]

'female'

### Examples of when you need `inplace=True` (including index operations)

| Operation | Default behavior | With `inplace=True` |
|-----------|----------------|------------------|
| `df.drop("x", axis=1)` | Returns a new DataFrame without column `"x"`; original `df` stays the same | Original `df` loses column `"x"` |
| `df.drop(0, axis=0)` | Returns a new DataFrame without row 0; original `df` unchanged | Original `df` loses row 0 |
| `df.sort_values("col")` | Returns a sorted copy; original `df` unchanged | Original `df` is sorted |
| `df.sort_index()` | Returns a copy sorted by index; original `df` unchanged | Original `df` is sorted by index |
| `df.fillna(0)` | Returns a copy with NaNs replaced | Original `df` modified |
| `df.rename(columns={"old":"new"})` | Returns a copy with renamed columns | Original `df` modified |
| `df.rename(index={0:"zero"})` | Returns a copy with renamed index | Original `df` index modified |
| `df.set_index("col")` | Returns a new DataFrame with `"col"` as index; original df unchanged | Original `df` now uses `"col"` as index |
| `df.reset_index()` | Returns a copy with index reset; original df unchanged | Original `df` index reset |
