Q1: Detect outliers in the Salary column using the IQR method

In [1]:
import pandas as pd

data = {'Salary': [25000, 27000, 29000, 31000, 50000, 100000]}
df = pd.DataFrame(data)

Q1 = df['Salary'].quantile(0.25)
Q3 = df['Salary'].quantile(0.75)
IQR = Q3 - Q1

outliers = df[(df['Salary'] < Q1 - 1.5 * IQR) | (df['Salary'] > Q3 + 1.5 * IQR)]
outliers

Unnamed: 0,Salary
5,100000


Q2: Detect column data types and convert Age from float to int

In [2]:
data = {'Name': ['A', 'B', 'C'], 'Age': [21.0, 22.0, 23.0]}
df = pd.DataFrame(data)

df.dtypes

df['Age'] = df['Age'].astype(int)
df.dtypes

Name    object
Age      int64
dtype: object

Q3: Apply Label Encoding to a City column

In [3]:
from sklearn.preprocessing import LabelEncoder

data = {'City': ['Pune', 'Mumbai', 'Delhi', 'Pune', 'Delhi']}
df = pd.DataFrame(data)

le = LabelEncoder()
df['City_Encoded'] = le.fit_transform(df['City'])
df

Unnamed: 0,City,City_Encoded
0,Pune,2
1,Mumbai,1
2,Delhi,0
3,Pune,2
4,Delhi,0


Q4: Apply One-Hot Encoding to a Department column

In [4]:
data = {'Department': ['HR', 'IT', 'Finance', 'HR', 'IT']}
df = pd.DataFrame(data)

df_encoded = pd.get_dummies(df, columns=['Department'])
df_encoded

Unnamed: 0,Department_Finance,Department_HR,Department_IT
0,False,True,False
1,False,False,True
2,True,False,False
3,False,True,False
4,False,False,True


Q5: Create a DataFrame and split into train and test sets

In [5]:
from sklearn.model_selection import train_test_split

data = {'Age': [22, 25, 47, 52, 46, 56], 'Purchased': [0, 1, 1, 0, 1, 0]}
df = pd.DataFrame(data)

X = df[['Age']]
y = df['Purchased']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

Q6: Replace multiple values in a column using a dictionary

In [6]:
data = {'Grade': ['A+', 'B', 'A', 'C', 'B+']}
df = pd.DataFrame(data)

grade_map = {
    'A+': 'Excellent',
    'A': 'Very Good',
    'B+': 'Good',
    'B': 'Average',
    'C': 'Poor'
}

df['Grade'] = df['Grade'].replace(grade_map)
df

Unnamed: 0,Grade
0,Excellent
1,Average
2,Very Good
3,Poor
4,Good


Q7: Normalize Age column using Min-Max Scaling

In [7]:
from sklearn.preprocessing import MinMaxScaler

data = {'Age': [18, 22, 25, 30, 35]}
df = pd.DataFrame(data)

scaler = MinMaxScaler()
df['Age_Normalized'] = scaler.fit_transform(df[['Age']])
df

Unnamed: 0,Age,Age_Normalized
0,18,0.0
1,22,0.235294
2,25,0.411765
3,30,0.705882
4,35,1.0


Q8: Fill missing values using interpolation

In [8]:
import numpy as np

data = {'Marks': [85, np.nan, 90, np.nan, 95]}
df = pd.DataFrame(data)

df['Marks'] = df['Marks'].interpolate()
df

Unnamed: 0,Marks
0,85.0
1,87.5
2,90.0
3,92.5
4,95.0


Q9: Replace values in Marks less than 50 with 'Fail' using where()

In [9]:
data = {'Marks': [45, 67, 88, 32, 76]}
df = pd.DataFrame(data)

df['Result'] = df['Marks'].where(df['Marks'] >= 50, 'Fail')
df

Unnamed: 0,Marks,Result
0,45,Fail
1,67,67
2,88,88
3,32,Fail
4,76,76


Q10: Split a multi-feature DataFrame into train-test sets

In [10]:
data = {
    'Age': [25, 30, 45, 35, 22],
    'Salary': [50000, 60000, 80000, 65000, 45000],
    'Purchased': [1, 0, 1, 0, 1]
}

df = pd.DataFrame(data)

X = df[['Age', 'Salary']]
y = df['Purchased']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)