[Reference](https://medium.com/ai-science/the-untold-usage-of-loc-in-pandas-the-missing-manual-for-advanced-data-manipulation-330316e0e22e)

In [2]:
# Import the required packages
import numpy as np
import pandas as pd
import seaborn as sns

# Load the Tips dataset from seaborn
df = sns.load_dataset('tips')
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [3]:
# Select the first row of the dataframe
first_row = df.loc[0]
first_row

total_bill     16.99
tip             1.01
sex           Female
smoker            No
day              Sun
time          Dinner
size               2
Name: 0, dtype: object

In [4]:
# Select the first five rows and columns 'total_bill' and 'tip'
selected_data = df.loc[:4, ['total_bill', 'tip']]
selected_data

Unnamed: 0,total_bill,tip
0,16.99,1.01
1,10.34,1.66
2,21.01,3.5
3,23.68,3.31
4,24.59,3.61


In [5]:
# Filtering
dinner_data = df.loc[df['time'] == 'Dinner']
dinner_data

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [6]:
mask = df['day'] == 'Sat'
saturday_tips = df.loc[mask]
saturday_tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
19,20.65,3.35,Male,No,Sat,Dinner,3
20,17.92,4.08,Male,No,Sat,Dinner,2
21,20.29,2.75,Female,No,Sat,Dinner,2
22,15.77,2.23,Female,No,Sat,Dinner,2
23,39.42,7.58,Male,No,Sat,Dinner,4
...,...,...,...,...,...,...,...
238,35.83,4.67,Female,No,Sat,Dinner,3
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2


In [7]:
saturday_total_bill = df.loc[mask, 'total_bill']
saturday_total_bill

19     20.65
20     17.92
21     20.29
22     15.77
23     39.42
       ...  
238    35.83
239    29.03
240    27.18
241    22.67
242    17.82
Name: total_bill, Length: 87, dtype: float64

In [8]:
# assign a new value of 10 to the 'tip' column for the rows
# where the 'total_bill' is greater than 40
df.loc[df['total_bill'] > 40, 'tip'] = 10

df[df.total_bill > 40]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
59,48.27,10.0,Male,No,Sat,Dinner,4
95,40.17,10.0,Male,Yes,Fri,Dinner,4
102,44.3,10.0,Female,Yes,Sat,Dinner,3
142,41.19,10.0,Male,No,Thur,Lunch,5
156,48.17,10.0,Male,No,Sun,Dinner,6
170,50.81,10.0,Male,Yes,Sat,Dinner,3
182,45.35,10.0,Male,Yes,Sun,Dinner,3
184,40.55,10.0,Male,Yes,Sun,Dinner,2
197,43.11,10.0,Female,Yes,Thur,Lunch,4
212,48.33,10.0,Male,No,Sat,Dinner,4


In [9]:
df.loc[df['day'].isin(['saturday', 'sunday']), 'weekend'] = True
df.loc[~df['day'].isin(['saturday', 'sunday']), 'weekend'] = False
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,weekend
0,16.99,1.01,Female,No,Sun,Dinner,2,False
1,10.34,1.66,Male,No,Sun,Dinner,3,False
2,21.01,3.50,Male,No,Sun,Dinner,3,False
3,23.68,3.31,Male,No,Sun,Dinner,2,False
4,24.59,3.61,Female,No,Sun,Dinner,4,False
...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,False
240,27.18,2.00,Female,Yes,Sat,Dinner,2,False
241,22.67,2.00,Male,Yes,Sat,Dinner,2,False
242,17.82,1.75,Male,No,Sat,Dinner,2,False


In [10]:
df.loc[:, 'tip_percentage'] = df.loc[:, ['tip', 'total_bill']].apply(lambda x: x['tip'] / x['total_bill'] * 100, axis=1)

df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,weekend,tip_percentage
0,16.99,1.01,Female,No,Sun,Dinner,2,False,5.944673
1,10.34,1.66,Male,No,Sun,Dinner,3,False,16.054159
2,21.01,3.50,Male,No,Sun,Dinner,3,False,16.658734
3,23.68,3.31,Male,No,Sun,Dinner,2,False,13.978041
4,24.59,3.61,Female,No,Sun,Dinner,4,False,14.680765
...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,False,20.392697
240,27.18,2.00,Female,Yes,Sat,Dinner,2,False,7.358352
241,22.67,2.00,Male,Yes,Sat,Dinner,2,False,8.822232
242,17.82,1.75,Male,No,Sat,Dinner,2,False,9.820426


In [11]:
def func(x):
    # Your function logic here
    pass

mask = df['day'] == 'Sat'
df.loc[mask, 'new_column'] = df[mask].apply(func, axis=1)
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,weekend,tip_percentage,new_column
0,16.99,1.01,Female,No,Sun,Dinner,2,False,5.944673,
1,10.34,1.66,Male,No,Sun,Dinner,3,False,16.054159,
2,21.01,3.50,Male,No,Sun,Dinner,3,False,16.658734,
3,23.68,3.31,Male,No,Sun,Dinner,2,False,13.978041,
4,24.59,3.61,Female,No,Sun,Dinner,4,False,14.680765,
...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,False,20.392697,
240,27.18,2.00,Female,Yes,Sat,Dinner,2,False,7.358352,
241,22.67,2.00,Male,Yes,Sat,Dinner,2,False,8.822232,
242,17.82,1.75,Male,No,Sat,Dinner,2,False,9.820426,


In [12]:
# Inserting null values for the next example
# Affecting 10 rows
mask = df.total_bill > 40
df.loc[mask, 'tip'] = np.nan

# To display the missing values
df[df.total_bill> 38]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,weekend,tip_percentage,new_column
23,39.42,7.58,Male,No,Sat,Dinner,4,False,19.228818,
56,38.01,3.0,Male,Yes,Sat,Dinner,4,False,7.89266,
59,48.27,,Male,No,Sat,Dinner,4,False,20.716801,
95,40.17,,Male,Yes,Fri,Dinner,4,False,24.8942,
102,44.3,,Female,Yes,Sat,Dinner,3,False,22.573363,
112,38.07,4.0,Male,No,Sun,Dinner,3,False,10.506961,
142,41.19,,Male,No,Thur,Lunch,5,False,24.277737,
156,48.17,,Male,No,Sun,Dinner,6,False,20.759809,
170,50.81,,Male,Yes,Sat,Dinner,3,False,19.681165,
182,45.35,,Male,Yes,Sun,Dinner,3,False,22.050717,


In [13]:
mask = df.tip.isnull()
df.loc[mask, 'tip'] = df.groupby(['time', 'sex'])['tip'].transform('mean')

# To filled missing values
df[df.total_bill> 38]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,weekend,tip_percentage,new_column
23,39.42,7.58,Male,No,Sat,Dinner,4,False,19.228818,
56,38.01,3.0,Male,Yes,Sat,Dinner,4,False,7.89266,
59,48.27,2.974359,Male,No,Sat,Dinner,4,False,20.716801,
95,40.17,2.974359,Male,Yes,Fri,Dinner,4,False,24.8942,
102,44.3,3.011961,Female,Yes,Sat,Dinner,3,False,22.573363,
112,38.07,4.0,Male,No,Sun,Dinner,3,False,10.506961,
142,41.19,2.815937,Male,No,Thur,Lunch,5,False,24.277737,
156,48.17,2.974359,Male,No,Sun,Dinner,6,False,20.759809,
170,50.81,2.974359,Male,Yes,Sat,Dinner,3,False,19.681165,
182,45.35,2.974359,Male,Yes,Sun,Dinner,3,False,22.050717,


In [14]:
# First we calculate the average tip by different time and gender
average_tip_by_time_sex = df.groupby(['time', 'sex'], as_index=False)['tip'].mean()
average_tip_by_time_sex

Unnamed: 0,time,sex,tip
0,Lunch,Male,2.815937
1,Lunch,Female,2.511765
2,Dinner,Male,2.974359
3,Dinner,Female,3.011961


In [15]:
# Create our function to fill up missing value
# based on the same `time` and `sex`
# For non-missing values, the original values are kept as it is
def get_avg_tip_by_time_sex(row):
    time = row['time']
    sex = row['sex']
    tip_value = row['tip']

    # if row['tip'] is a missing value
    if np.isnan(row['tip']):
        # find the record from the calculated average from the groupby result
        selected_rows = average_tip_by_time_sex[(average_tip_by_time_sex.time == time) & (average_tip_by_time_sex.sex == sex)]
        tip_value = selected_rows.tip.iloc[0]
    return tip_value


df.apply(get_avg_tip_by_time_sex, axis=1)

0      1.01
1      1.66
2      3.50
3      3.31
4      3.61
       ... 
239    5.92
240    2.00
241    2.00
242    1.75
243    3.00
Length: 244, dtype: float64

In [16]:
mask = df.tip.isnull()
df.loc[mask, 'tip'] = df.groupby(['time', 'sex'])['tip'].transform('mean')