In [80]:
import pandas as pd
from sklearn.impute import SimpleImputer

Rename

In [81]:
data = {'A': [1, 2, 3],
        'B': [4, 5, 6],
        'C': [4, 5, 6]}

df = pd.DataFrame(data)

# Rename a single column
df.rename(columns={'A': 'New_A'}, inplace=True)

# Rename multiple columns
df.rename(columns={'B': 'New_B', 'New_A': 'New_A_2'}, inplace=True)
df

Unnamed: 0,New_A_2,New_B,C
0,1,4,4
1,2,5,5
2,3,6,6


In [82]:
df.columns = ['New_Column1', 'New_Column2', 'C']
df

Unnamed: 0,New_Column1,New_Column2,C
0,1,4,4
1,2,5,5
2,3,6,6


Delete

In [83]:
df.drop(columns=['New_Column1'], inplace=True)  # Delete a single column
df

Unnamed: 0,New_Column2,C
0,4,4
1,5,5
2,6,6


In [84]:
del df['C']
df


Unnamed: 0,New_Column2
0,4
1,5
2,6


Filter

In [85]:
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David'],
        'Age': [25, 30, 35, 28]}

df = pd.DataFrame(data)

# Filter rows where Age is greater than 30
filtered_df = df[df['Age'] > 30]
filtered_df


Unnamed: 0,Name,Age
2,Charlie,35


Sort

In [86]:
# Sort by Age in ascending order
sorted_df = df.sort_values(by='Age')
sorted_df

Unnamed: 0,Name,Age
0,Alice,25
3,David,28
1,Bob,30
2,Charlie,35


In [87]:
# Sort first by Age in ascending order, then by Name in descending order
sorted_df = df.sort_values(by=['Age', 'Name'], ascending=[True, False])
sorted_df

Unnamed: 0,Name,Age
0,Alice,25
3,David,28
1,Bob,30
2,Charlie,35


Adding and Modifying Columns:

In [88]:
# Calculate a new column 'Birth Year' based on Age
df['Birth Year'] = 2023 - df['Age']
df

Unnamed: 0,Name,Age,Birth Year
0,Alice,25,1998
1,Bob,30,1993
2,Charlie,35,1988
3,David,28,1995


In [89]:
# Increment all ages by 1
df['Age'] = df['Age'] + 1
df

Unnamed: 0,Name,Age,Birth Year
0,Alice,26,1998
1,Bob,31,1993
2,Charlie,36,1988
3,David,29,1995


Aggregation and Grouping:

In [90]:
data = {'Category': ['A', 'B', 'A', 'B', 'A'],
        'Value': [10, 15, 20, 25, 30]}

df = pd.DataFrame(data)

# Group by 'Category' and calculate the mean for each group
grouped_df = df.groupby('Category').mean()
grouped_df


Unnamed: 0_level_0,Value
Category,Unnamed: 1_level_1
A,20.0
B,20.0


Handling Missing Data:

In [91]:
data = {'A': [1, 2, None, 4, 5],
        'B': [None, 2, 3, 4, 5]}

df = pd.DataFrame(data)

# Drop rows with any missing values
df.dropna(inplace=True)
df

Unnamed: 0,A,B
1,2.0,2.0
3,4.0,4.0
4,5.0,5.0


In [92]:
data = {'A': [1, 2, None, 4, 5],
        'B': [None, 2, 3, 4, 5]}

df = pd.DataFrame(data)

# Fill missing values with a specific value (e.g., 0)
df.fillna(0, inplace=True)
df


Unnamed: 0,A,B
0,1.0,0.0
1,2.0,2.0
2,0.0,3.0
3,4.0,4.0
4,5.0,5.0


In [93]:
# Create a sample DataFrame with missing values
data = {'A': [1, 2, None, 4, 5],
        'B': [None, 2, 3, 4, 5]}
df = pd.DataFrame(data)

# Initialize the SimpleImputer with a strategy (e.g., 'mean', 'median', 'most_frequent', or a constant value)
imputer = SimpleImputer(strategy='mean')  # You can change the strategy as needed

# Fit the imputer on the DataFrame and transform it to fill missing values
df_filled = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)
df_filled

Unnamed: 0,A,B
0,1.0,3.5
1,2.0,2.0
2,3.0,3.0
3,4.0,4.0
4,5.0,5.0
