In [1]:
import pandas as pd
import numpy as np

# Sample DataFrame with some common data issues
data = {
    'Name': ['Alice', 'Bob', 'Charlie', None, 'Eve', 'Frank'],
    'Age': [25, np.nan, 30, 22, None, 28],
    'Email': ['alice@example.com', 'bob@example.com', 'charlie@example', 'eve@example.com', 'frank.com', None],
    'City': ['New York', 'Los Angeles', 'New York', 'Chicago', 'Los Angeles', None],
}

df = pd.DataFrame(data)

print("Original DataFrame:")
df.head()


Original DataFrame:


Unnamed: 0,Name,Age,Email,City
0,Alice,25.0,alice@example.com,New York
1,Bob,,bob@example.com,Los Angeles
2,Charlie,30.0,charlie@example,New York
3,,22.0,eve@example.com,Chicago
4,Eve,,frank.com,Los Angeles


In [2]:
df.isnull()

Unnamed: 0,Name,Age,Email,City
0,False,False,False,False
1,False,True,False,False
2,False,False,False,False
3,True,False,False,False
4,False,True,False,False
5,False,False,True,True


In [3]:
df

Unnamed: 0,Name,Age,Email,City
0,Alice,25.0,alice@example.com,New York
1,Bob,,bob@example.com,Los Angeles
2,Charlie,30.0,charlie@example,New York
3,,22.0,eve@example.com,Chicago
4,Eve,,frank.com,Los Angeles
5,Frank,28.0,,


In [6]:
df.duplicated()

0    False
1    False
2    False
3    False
4    False
5    False
dtype: bool

In [5]:
df.loc[df.duplicated(subset=['Email'])].head()

Unnamed: 0,Name,Age,Email,City


In [2]:
df.fillna(0)

Unnamed: 0,Name,Age,Email,City
0,Alice,25.0,alice@example.com,New York
1,Bob,0.0,bob@example.com,Los Angeles
2,Charlie,30.0,charlie@example,New York
3,0,22.0,eve@example.com,Chicago
4,Eve,0.0,frank.com,Los Angeles
5,Frank,28.0,0,0


In [4]:
df[df['City'] == 'New York']

Unnamed: 0,Name,Age,Email,City
0,Alice,25.0,alice@example.com,New York
2,Charlie,30.0,charlie@example,New York


In [5]:
df.loc[df['City'] == 'New York']

Unnamed: 0,Name,Age,Email,City
0,Alice,25.0,alice@example.com,New York
2,Charlie,30.0,charlie@example,New York


In [6]:
filt=df['City'] == 'New York'
df.loc[filt]['Age'].agg(['mean','median'])

mean      27.5
median    27.5
Name: Age, dtype: float64

In [8]:
filt=df['City'] == 'New York'
df.loc[filt]['Age'].agg(['mean','median'])

mean      27.5
median    27.5
Name: Age, dtype: float64

In [10]:
df['Age'].min()

22.0

In [12]:
# df['Age'].astype(int)
df.isnull()

Unnamed: 0,Name,Age,Email,City
0,False,False,False,False
1,False,True,False,False
2,False,False,False,False
3,True,False,False,False
4,False,True,False,False
5,False,False,True,True


In [13]:
df.replace({'Name':{np.nan:'Kia'}},inplace=True)

In [14]:
df.replace({'Age':np.nan},df['Age'].mean(),inplace=True)
df

Unnamed: 0,Name,Age,Email,City
0,Alice,25.0,alice@example.com,New York
1,Bob,26.25,bob@example.com,Los Angeles
2,Charlie,30.0,charlie@example,New York
3,Kia,22.0,eve@example.com,Chicago
4,Eve,26.25,frank.com,Los Angeles
5,Frank,28.0,,


In [15]:
df.replace({'Email':np.nan},'abc@gmail.com',inplace=True)

In [16]:
df.replace({'City':np.nan},'Ohio',inplace=True)

In [17]:
df.iloc[[0,1],2]

0    alice@example.com
1      bob@example.com
Name: Email, dtype: object

In [18]:
df[2:4]['City']

2    New York
3     Chicago
Name: City, dtype: object

In [19]:
df.drop_duplicates()

Unnamed: 0,Name,Age,Email,City
0,Alice,25.0,alice@example.com,New York
1,Bob,26.25,bob@example.com,Los Angeles
2,Charlie,30.0,charlie@example,New York
3,Kia,22.0,eve@example.com,Chicago
4,Eve,26.25,frank.com,Los Angeles
5,Frank,28.0,abc@gmail.com,Ohio


In [20]:
filt=df['Age']>26
df.loc[filt,'City']

1    Los Angeles
2       New York
4    Los Angeles
5           Ohio
Name: City, dtype: object

In [21]:
df.columns

Index(['Name', 'Age', 'Email', 'City'], dtype='object')

In [22]:
df.columns=[x.lower() for x in df.columns]
df

Unnamed: 0,name,age,email,city
0,Alice,25.0,alice@example.com,New York
1,Bob,26.25,bob@example.com,Los Angeles
2,Charlie,30.0,charlie@example,New York
3,Kia,22.0,eve@example.com,Chicago
4,Eve,26.25,frank.com,Los Angeles
5,Frank,28.0,abc@gmail.com,Ohio


In [23]:

df = df.drop((4, 'email'), axis=1)
df

KeyError: "[(4, 'email')] not found in axis"

In [24]:
df.loc[4,'email']='eve.h3r@gmail.com'
df

Unnamed: 0,name,age,email,city
0,Alice,25.0,alice@example.com,New York
1,Bob,26.25,bob@example.com,Los Angeles
2,Charlie,30.0,charlie@example,New York
3,Kia,22.0,eve@example.com,Chicago
4,Eve,26.25,eve.h3r@gmail.com,Los Angeles
5,Frank,28.0,abc@gmail.com,Ohio


In [25]:
#using at method
df.at[5,'email'] = 'frank@booking.com'
df

Unnamed: 0,name,age,email,city
0,Alice,25.0,alice@example.com,New York
1,Bob,26.25,bob@example.com,Los Angeles
2,Charlie,30.0,charlie@example,New York
3,Kia,22.0,eve@example.com,Chicago
4,Eve,26.25,eve.h3r@gmail.com,Los Angeles
5,Frank,28.0,frank@booking.com,Ohio


In [26]:
df['city'].str.lower()

0       new york
1    los angeles
2       new york
3        chicago
4    los angeles
5           ohio
Name: city, dtype: object

In [27]:
df['city'].apply(len)

0     8
1    11
2     8
3     7
4    11
5     4
Name: city, dtype: int64

In [28]:
df

Unnamed: 0,name,age,email,city
0,Alice,25.0,alice@example.com,New York
1,Bob,26.25,bob@example.com,Los Angeles
2,Charlie,30.0,charlie@example,New York
3,Kia,22.0,eve@example.com,Chicago
4,Eve,26.25,eve.h3r@gmail.com,Los Angeles
5,Frank,28.0,frank@booking.com,Ohio


In [29]:
df['email'].str.split('@')

0    [alice, example.com]
1      [bob, example.com]
2      [charlie, example]
3      [eve, example.com]
4    [eve.h3r, gmail.com]
5    [frank, booking.com]
Name: email, dtype: object

In [30]:
df.columns

Index(['name', 'age', 'email', 'city'], dtype='object')

In [31]:
df.head()

Unnamed: 0,name,age,email,city
0,Alice,25.0,alice@example.com,New York
1,Bob,26.25,bob@example.com,Los Angeles
2,Charlie,30.0,charlie@example,New York
3,Kia,22.0,eve@example.com,Chicago
4,Eve,26.25,eve.h3r@gmail.com,Los Angeles


In [32]:
filt=df['age']>25
df.loc[filt,'city']

1    Los Angeles
2       New York
4    Los Angeles
5           Ohio
Name: city, dtype: object

In [33]:
df

Unnamed: 0,name,age,email,city
0,Alice,25.0,alice@example.com,New York
1,Bob,26.25,bob@example.com,Los Angeles
2,Charlie,30.0,charlie@example,New York
3,Kia,22.0,eve@example.com,Chicago
4,Eve,26.25,eve.h3r@gmail.com,Los Angeles
5,Frank,28.0,frank@booking.com,Ohio


In [36]:
df.columns


Index(['name', 'age', 'email', 'city'], dtype='object')

In [39]:
df[['age','city']]

Unnamed: 0,age,city
0,25.0,New York
1,26.25,Los Angeles
2,30.0,New York
3,22.0,Chicago
4,26.25,Los Angeles
5,28.0,Ohio


In [58]:
#updating the column names
df.rename(columns={'name':'Name','city':'City'},inplace=True)
df

Unnamed: 0,Name,age,email,City
0,Alice,25.0,alice@example.com,New York
1,Bob,26.25,bob@example.com,Los Angeles
2,Charlie,30.0,charlie@example,New York
3,Kia,22.0,eve@example.com,Chicago
4,Eve,26.25,eve.h3r@gmail.com,Los Angeles
5,Frank,28.0,frank@booking.com,Ohio


In [59]:
#updating the rows
df.loc[6]= ['Malli',23,'malli.gude@gmail.com','Netherlands']
df

Unnamed: 0,Name,age,email,City
0,Alice,25.0,alice@example.com,New York
1,Bob,26.25,bob@example.com,Los Angeles
2,Charlie,30.0,charlie@example,New York
3,Kia,22.0,eve@example.com,Chicago
4,Eve,26.25,eve.h3r@gmail.com,Los Angeles
5,Frank,28.0,frank@booking.com,Ohio
6,Malli,23.0,malli.gude@gmail.com,Netherlands


In [64]:
#updating the specific row
df.loc[3,'email'] = 'kia.atp@gmail.com'
df

Unnamed: 0,Name,age,email,City
0,Alice,25.0,alice@example.com,New York
1,Bob,26.25,bob@example.com,Los Angeles
2,Charlie,30.0,charlie@example,New York
3,Kia,22.0,kia.atp@gmail.com,Chicago
4,Eve,26.25,eve.h3r@gmail.com,Los Angeles
5,Frank,28.0,frank@booking.com,Ohio
6,Malli,23.0,malli.gude@gmail.com,Netherlands


In [65]:
df.loc[5,'City']='New Jersey'
df

Unnamed: 0,Name,age,email,City
0,Alice,25.0,alice@example.com,New York
1,Bob,26.25,bob@example.com,Los Angeles
2,Charlie,30.0,charlie@example,New York
3,Kia,22.0,kia.atp@gmail.com,Chicago
4,Eve,26.25,eve.h3r@gmail.com,Los Angeles
5,Frank,28.0,frank@booking.com,New Jersey
6,Malli,23.0,malli.gude@gmail.com,Netherlands


In [67]:
df.loc[6,'City'] = 'Texas'
df

Unnamed: 0,Name,age,email,City
0,Alice,25.0,alice@example.com,New York
1,Bob,26.25,bob@example.com,Los Angeles
2,Charlie,30.0,charlie@example,New York
3,Kia,22.0,kia.atp@gmail.com,Chicago
4,Eve,26.25,eve.h3r@gmail.com,Los Angeles
5,Frank,28.0,frank@booking.com,New Jersey
6,Malli,23.0,malli.gude@gmail.com,Texas
