### ***Important Interview Questions based on `numpy` and `pandas`***

In [1]:
import pandas as pd
import numpy as np

1. Given a pandas DataFrame with a column values containing hyphen-separated numbers (e.g., "1-1-6", "5-1-8", "1-3-1-4"), create a new column that counts how many numbers in each row are greater than 1. The solution should be vectorized (no explicit Python loops).

In [15]:
df = pd.DataFrame({'values':["1-1-6", "5-1-8", "1-3-1-4", "20-0-2-1"]})
df

Unnamed: 0,values
0,1-1-6
1,5-1-8
2,1-3-1-4
3,20-0-2-1


In [16]:
(df['values'].str.split('-', expand=True).astype('Int64') > 1).sum(axis=1)

0    1
1    2
2    2
3    2
dtype: Int64

In [17]:
df['values']

0       1-1-6
1       5-1-8
2     1-3-1-4
3    20-0-2-1
Name: values, dtype: object

In [18]:
def func(x):
    ls = x.split('-')
    cnt = 0
    for i in ls:
        if int(i) > 1:
            cnt+=1
    return cnt

df['values'].apply(func)

0    1
1    2
2    2
3    2
Name: values, dtype: int64

2. Replace all values < 5 with 0 using vectorization.

In [19]:
df = pd.DataFrame({'values':[12, 5, 2, 3, 1, 67]})
df

Unnamed: 0,values
0,12
1,5
2,2
3,3
4,1
5,67


In [20]:
df['values'].where( df['values']>=5, other=0)

0    12
1     5
2     0
3     0
4     0
5    67
Name: values, dtype: int64

3. Count non-null values per row.

In [32]:

df = pd.DataFrame({
    'A': [1, 4, np.nan],
    'B': [2, np.nan, 3],
    'C': [np.nan, 6, 5]
})

df

Unnamed: 0,A,B,C
0,1.0,2.0,
1,4.0,,6.0
2,,3.0,5.0


In [33]:
# counts all non null values
df.count()

A    2
B    2
C    2
dtype: int64

In [34]:
# counts all null values
df.isna().sum(axis=1)

0    1
1    1
2    1
dtype: int64

4. Return indices of values greater than the array mean.

In [35]:
df = pd.DataFrame({'values':[12, 5, 2, 3, 1, 67]})
df

Unnamed: 0,values
0,12
1,5
2,2
3,3
4,1
5,67


In [36]:
df.mean()

values    15.0
dtype: float64

In [37]:
df['values'] > df['values'].mean()


0    False
1    False
2    False
3    False
4    False
5     True
Name: values, dtype: bool

In [38]:
np.where(df['values'] > df['values'].mean())

(array([5]),)

In [39]:
# 5. Find the row-wise maximum ignoring NaNs.

In [41]:

df = pd.DataFrame({
    'A': [1, 4, np.nan],
    'B': [2, np.nan, 3],
    'C': [np.nan, 6, 5]
})

df.max()

A    4.0
B    3.0
C    6.0
dtype: float64

In [42]:
# 6. Replace all negative values with their absolute values.

df = pd.DataFrame({
    'A': [-1, 4, np.nan],
    'B': [2, np.nan, -3],
    'C': [np.nan, 6, 5]
})

In [49]:
df.abs()

Unnamed: 0,A,B,C
0,1.0,2.0,
1,4.0,,6.0
2,,3.0,5.0


In [67]:
# 7. Create a boolean column if any value in the row is negative.
df = pd.DataFrame({
    'A': [-1, 4, 3],
    'B': [2, 6, 3],
    'C': [-4, 6, 5]
})
df

Unnamed: 0,A,B,C
0,-1,2,-4
1,4,6,6
2,3,3,5


In [78]:
# 1st way:
def func(x):
    # print(type(x))
    for i in x:
        if i < 0:
            return True
    return False

# df.T.apply(func)
df.apply(func, axis=1)

0     True
1    False
2    False
dtype: bool

In [79]:
# 2nd way:
df.apply(lambda x: (x < 0).any(), axis=1)

0     True
1    False
2    False
dtype: bool

In [85]:
# 8. Replace NaNs in each column with median + 1.
df = pd.DataFrame({
    'A': [-1, 4, 3],
    'B': [2, np.nan, 3],
    'C': [np.nan, 6, 5]
})
df

Unnamed: 0,A,B,C
0,-1,2.0,
1,4,,6.0
2,3,3.0,5.0


In [91]:
# replacing null values with median+1
df.apply(lambda x: x.fillna(x.median()+1), axis=1)

Unnamed: 0,A,B,C
0,-1.0,2.0,1.5
1,4.0,6.0,6.0
2,3.0,3.0,5.0


In [111]:
# 9. Convert all string columns to uppercase

df = pd.DataFrame( {'names':['Jai', 'Mithun', 'harsh'],
                    'age':[45, 34, 52]})

In [112]:
df

Unnamed: 0,names,age
0,Jai,45
1,Mithun,34
2,harsh,52


In [100]:
df['names'].str.upper()

0       JAI
1    MITHUN
2     HARSH
Name: names, dtype: object

In [103]:
df['names'].dtype == object

True

In [120]:
# 1st way:
res = df.copy()
for col in df.columns:
    if df[col].dtype==object:
        res[col] = df[col].str.upper()
res

Unnamed: 0,names,age
0,JAI,45
1,MITHUN,34
2,HARSH,52


In [121]:
# 2nd way:
df.apply( lambda x: x.str.upper() if x.dtype==object else x)

Unnamed: 0,names,age
0,JAI,45
1,MITHUN,34
2,HARSH,52


In [131]:
# 10. Create a column that is "high" if the row sum > 15, else "low"

df = pd.DataFrame( {'values_1':[12, 6, 7, 10, 14, 9, 8, 15],
                   'values_2':[4, 10, 3, 12, 10, 5, 7, 4]})
df.T

Unnamed: 0,0,1,2,3,4,5,6,7
values_1,12,6,7,10,14,9,8,15
values_2,4,10,3,12,10,5,7,4


In [134]:
df.apply(lambda x: "High" if x.sum()>15 else "Low" , axis=1)

0    High
1    High
2     Low
3    High
4    High
5     Low
6     Low
7    High
dtype: object

In [136]:
# 11. Normalize each column using

df = pd.DataFrame( {'values_1':[12, 6, 7, 10, 14, 9, 8, 15],
                   'values_2':[404, 105, 305, 232, 415, 505, 630, 390]})
df.T

Unnamed: 0,0,1,2,3,4,5,6,7
values_1,12,6,7,10,14,9,8,15
values_2,404,105,305,232,415,505,630,390


In [141]:
# using vectorization applying changes on every cell of both columns
df.apply(lambda x: (x - x.mean())/x.std() )

Unnamed: 0,values_1,values_2
0,0.5733,0.190278
1,-1.26126,-1.659901
2,-0.9555,-0.422323
3,-0.03822,-0.874039
4,1.18482,0.258344
5,-0.34398,0.815254
6,-0.64974,1.58874
7,1.49058,0.103647


In [142]:
# 11. Count how many values in each row are even numbers.
df = pd.DataFrame( {'values_1':[12, 6, 7, 10, 14, 9, 8, 15],
                   'values_2':[404, 105, 305, 232, 415, 505, 630, 390]})
df.T

Unnamed: 0,0,1,2,3,4,5,6,7
values_1,12,6,7,10,14,9,8,15
values_2,404,105,305,232,415,505,630,390


In [148]:
# using boolean mask
(df % 2==0).sum(axis=1)

0    2
1    1
2    0
3    2
4    1
5    0
6    2
7    1
dtype: int64

In [150]:
# using lambda functin and apply()
df.apply(lambda x: (x%2==0).sum(), axis=1)

0    2
1    1
2    0
3    2
4    1
5    0
6    2
7    1
dtype: int64

In [151]:
# 12. For a DataFrame with numeric + string columns, apply a function only to numeric columns.


df = pd.DataFrame( {'names':['Jai', 'Mithun', 'harsh'],
                    'age':[45, 34, 52]})
df

Unnamed: 0,names,age
0,Jai,45
1,Mithun,34
2,harsh,52


In [164]:
df.apply(lambda x: x+25 if x.dtype!=object else x )

Unnamed: 0,names,age
0,Jai,70
1,Mithun,59
2,harsh,77
