# Pandas DataFrame
    Pandas dataframe extends numpy two-dimensional arrays by giving labels to the columns and in case explicit indices are provided to the rows as well
## Learning Objectives
    1. Creating DataFrames from Series objects
    2. Reading DataFrames from an external file...
    3. Setting Indexes
    4. Numeric Operations for Selecting, combining, and creating colums
    5. Concatenate dataframes
    6. How to iterate over a dataframe's items
    7. Drop and inplace

In [None]:
# imports, follow the conventions...
import numpy as np
import pandas as pd

## Creating a DataFrame from series 
    Note: This way of creating dataframes is not common!

In [3]:
# creating from two series objects
pop2020 = pd.Series([99.9, 91.3, 99.4, 100, 83, 72.4, 84.5, 96.5, 84.8, 76.2],
                    index=['Java','C','C++','Python','C#','PHP','JavaScript','Ruby','R','Matlab'], name='2014')
pop2021 = pd.Series({"C" : 99.9, "C#" : 91.3, "C++" : 99.4, "Java" : 92, "JavaScript" : 83, 
                     "Matlab" : 53, "PHP" : 83, "Python" : 100, "R" : 84.8, "Ruby" : 76.2}, name='2015')
programming_languages_popularity_df = pd.DataFrame({'2020':pop2020, '2021':pop2021})
print("keys of the above dictionary are used as column names")
programming_languages_popularity_df

keys of the above dictionary are used as column names


Unnamed: 0,2020,2021
C,91.3,99.9
C#,83.0,91.3
C++,99.4,99.4
Java,99.9,92.0
JavaScript,84.5,83.0
Matlab,76.2,53.0
PHP,72.4,83.0
Python,100.0,100.0
R,84.8,84.8
Ruby,96.5,76.2


# Reading DataFrames from an external file...
    Note: This is the most common way of reading dataframes, which I'm aware of...

In [22]:
file_path = "files/programming_languages_popularity.csv"
# file_path = r"C:\Users\pooya\projects\PythonSchool\files\programming_languages_popularity.csv"
# column one or column at index zero is used as index!
popularity_df = pd.read_csv(file_path, index_col=0)
popularity_df

Unnamed: 0,2020,2021,2022
C,95.5,99.9,98.0
C#,95.5,91.3,90.0
C++,95.5,99.4,99.5
Java,95.0,92.0,91.0
JavaScript,84.5,83.0,83.0
Matlab,56.0,53.0,49.0
PHP,84.8,83.0,86.0
Python,100.0,100.0,100.0
R,74.3,84.8,85.0
Ruby,78.9,76.2,80.0


In [23]:
print("extracting array values: ")
popularity_df.values

extracting array values: 


array([[ 95.5,  99.9,  98. ],
       [ 95.5,  91.3,  90. ],
       [ 95.5,  99.4,  99.5],
       [ 95. ,  92. ,  91. ],
       [ 84.5,  83. ,  83. ],
       [ 56. ,  53. ,  49. ],
       [ 84.8,  83. ,  86. ],
       [100. , 100. , 100. ],
       [ 74.3,  84.8,  85. ],
       [ 78.9,  76.2,  80. ]])

In [24]:
print("extracting index values")
popularity_df.index

extracting index values


Index(['C', 'C#', 'C++', 'Java', 'JavaScript', 'Matlab', 'PHP', 'Python', 'R',
       'Ruby'],
      dtype='object')

In [25]:
print("extracting columns")
popularity_df.columns

extracting columns


Index(['2020', '2021', '2022'], dtype='object')

In [26]:
print("extracting columns")
popularity_df.keys()

extracting columns


Index(['2020', '2021', '2022'], dtype='object')

In [27]:
print("Getting a column")
popularity_df['2020']

Getting a column


C              95.5
C#             95.5
C++            95.5
Java           95.0
JavaScript     84.5
Matlab         56.0
PHP            84.8
Python        100.0
R              74.3
Ruby           78.9
Name: 2020, dtype: float64

In [28]:
# integer location
popularity_df.iloc[0: 2]

Unnamed: 0,2020,2021,2022
C,95.5,99.9,98.0
C#,95.5,91.3,90.0


In [32]:
# integer location
print("The first slicing is for the indices(first dimension) and the second one is for the columns(second dimension)")
popularity_df.iloc[:5, :2]

The first slicing is for the indices(first dimension) and the second one is for the columns(second dimension)


Unnamed: 0,2020,2021
C,95.5,99.9
C#,95.5,91.3
C++,95.5,99.4
Java,95.0,92.0
JavaScript,84.5,83.0


In [33]:
print("Getting a series and then, getting its values using an integer index:")
popularity_df['2020'][0]

Getting a series and then, getting its values using an integer index:


95.5

# Numeric Operations for Selecting, combining, and creating colums

In [35]:
popularity_df['Average'] = (popularity_df['2020'] + popularity_df['2021'] + popularity_df['2022']) / 3
# head is used to show the by default first 5 rows. Use tail for showing items from the end of file 
popularity_df.head() 

Unnamed: 0,2020,2021,2022,Average
C,95.5,99.9,98.0,97.8
C#,95.5,91.3,90.0,92.266667
C++,95.5,99.4,99.5,98.133333
Java,95.0,92.0,91.0,92.666667
JavaScript,84.5,83.0,83.0,83.5


In [36]:
print("Sorting a dataframe based on a column")
popularity_df = popularity_df.sort_values('2020', ascending=False)
popularity_df

Sorting a dataframe based on a column


Unnamed: 0,2020,2021,2022,Average
Python,100.0,100.0,100.0,100.0
C,95.5,99.9,98.0,97.8
C#,95.5,91.3,90.0,92.266667
C++,95.5,99.4,99.5,98.133333
Java,95.0,92.0,91.0,92.666667
PHP,84.8,83.0,86.0,84.6
JavaScript,84.5,83.0,83.0,83.5
Ruby,78.9,76.2,80.0,78.366667
R,74.3,84.8,85.0,81.366667
Matlab,56.0,53.0,49.0,52.666667


In [37]:
print("sorting based on more than one column with varying strategies of odering for each of the columns")
print("orders of the columns for sorting matters!")
popularity_df = popularity_df.sort_values(['2020', '2021'], ascending=[1, 0])
popularity_df

sorting based on more than one column with varying strategies of odering for each of the columns
orders of the columns for sorting matters!


Unnamed: 0,2020,2021,2022,Average
Matlab,56.0,53.0,49.0,52.666667
R,74.3,84.8,85.0,81.366667
Ruby,78.9,76.2,80.0,78.366667
JavaScript,84.5,83.0,83.0,83.5
PHP,84.8,83.0,86.0,84.6
Java,95.0,92.0,91.0,92.666667
C,95.5,99.9,98.0,97.8
C++,95.5,99.4,99.5,98.133333
C#,95.5,91.3,90.0,92.266667
Python,100.0,100.0,100.0,100.0


In [38]:
popularity_df.min()

2020       56.000000
2021       53.000000
2022       49.000000
Average    52.666667
dtype: float64

In [39]:
# mean over rows
popularity_df.mean(axis=0)

2020       86.000000
2021       86.260000
2022       86.150000
Average    86.136667
dtype: float64

In [40]:
# mean over columns
popularity_df.mean(axis=1)

Matlab         52.666667
R              81.366667
Ruby           78.366667
JavaScript     83.500000
PHP            84.600000
Java           92.666667
C              97.800000
C++            98.133333
C#             92.266667
Python        100.000000
dtype: float64

In [41]:
# Apply a function along an axis of the DataFrame.
popularity_df.apply(lambda l: l**2 + 10)

Unnamed: 0,2020,2021,2022,Average
Matlab,3146.0,2819.0,2411.0,2783.777778
R,5530.49,7201.04,7235.0,6630.534444
Ruby,6235.21,5816.44,6410.0,6151.334444
JavaScript,7150.25,6899.0,6899.0,6982.25
PHP,7201.04,6899.0,7406.0,7167.16
Java,9035.0,8474.0,8291.0,8597.111111
C,9130.25,9990.01,9614.0,9574.84
C++,9130.25,9890.36,9910.25,9640.151111
C#,9130.25,8345.69,8110.0,8523.137778
Python,10010.0,10010.0,10010.0,10010.0


In [43]:
# get info
popularity_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, Matlab to Python
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   2020     10 non-null     float64
 1   2021     10 non-null     float64
 2   2022     10 non-null     float64
 3   Average  10 non-null     float64
dtypes: float64(4)
memory usage: 700.0+ bytes


In [47]:
# change data type of a pandas dataframe!
popularity_df = popularity_df.astype(np.float32)
popularity_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, Matlab to Python
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   2020     10 non-null     float32
 1   2021     10 non-null     float32
 2   2022     10 non-null     float32
 3   Average  10 non-null     float32
dtypes: float32(4)
memory usage: 540.0+ bytes


In [48]:
# get description
popularity_df.describe()

Unnamed: 0,2020,2021,2022,Average
count,10.0,10.0,10.0,10.0
mean,86.0,86.259995,86.150002,86.136673
std,13.493784,14.285439,14.824997,14.004139
min,56.0,53.0,49.0,52.666668
25%,80.300001,83.0,83.5,81.900002
50%,89.900002,88.050003,88.0,88.433334
75%,95.5,97.550001,96.25,96.516668
max,100.0,100.0,100.0,100.0


In [50]:
print("Manipulating the order of columns in a dataframe")
cols = popularity_df.columns.tolist() # get columns
cols = [cols[-1], *cols[0:3]] # reorder them as you desire
popularity_reordered_df = popularity_df[cols] # you are creating another dataframe!
popularity_reordered_df

Manipulating the order of columns in a dataframe


Unnamed: 0,Average,2020,2021,2022
Matlab,52.666668,56.0,53.0,49.0
R,81.366669,74.300003,84.800003,85.0
Ruby,78.366669,78.900002,76.199997,80.0
JavaScript,83.5,84.5,83.0,83.0
PHP,84.599998,84.800003,83.0,86.0
Java,92.666664,95.0,92.0,91.0
C,97.800003,95.5,99.900002,98.0
C++,98.133331,95.5,99.400002,99.5
C#,92.26667,95.5,91.300003,90.0
Python,100.0,100.0,100.0,100.0


## Concatenate dataframes

In [51]:
dummy_data_1 = {
        'id':        ['1', '2', '3', '4', '5'],
        'Feature_1': ['A', 'C', 'E', 'G', 'I'],
        'Feature_2': ['B', 'D', 'F', 'H', 'J']
}
df1 = pd.DataFrame(dummy_data_1)
df1

Unnamed: 0,id,Feature_1,Feature_2
0,1,A,B
1,2,C,D
2,3,E,F
3,4,G,H
4,5,I,J


In [52]:
dummy_data_2 = {
        'id':        ['6', '7', '8', '9', '10'],
        'Feature_1': ['K', 'M', 'O', 'Q', 'S'],
        'Feature_2': ['L', 'N', 'P', 'R', 'T']}
df2 = pd.DataFrame(dummy_data_2)
df2

Unnamed: 0,id,Feature_1,Feature_2
0,6,K,L
1,7,M,N
2,8,O,P
3,9,Q,R
4,10,S,T


In [53]:
dummy_data_3 = {'Feature_3': [12, 13, 14, 15, 16, 17, 15, 12, 13, -1]}
df3 = pd.DataFrame(dummy_data_3)
df3

Unnamed: 0,Feature_3
0,12
1,13
2,14
3,15
4,16
5,17
6,15
7,12
8,13
9,-1


#### concatenate

In [74]:
df_row = pd.concat([df1, df2], axis=0)
df_row

Unnamed: 0,id,Feature_1,Feature_2
0,1,A,B
1,2,C,D
2,3,E,F
3,4,G,H
4,5,I,J
0,6,K,L
1,7,M,N
2,8,O,P
3,9,Q,R
4,10,S,T


In [82]:
df_row = df_row.reset_index().set_index("index")
df_row.index.name = ""
df_row

Unnamed: 0,id,Feature_1,Feature_2
,,,
0.0,1.0,A,B
1.0,2.0,C,D
2.0,3.0,E,F
3.0,4.0,G,H
4.0,5.0,I,J
0.0,6.0,K,L
1.0,7.0,M,N
2.0,8.0,O,P
3.0,9.0,Q,R


In [69]:
# reindexes the index 
# df_row = df_row.reset_index()
df_row = pd.concat([df1, df2], axis=0, ignore_index=True)

df_row

Unnamed: 0,id,Feature_1,Feature_2
0,1,A,B
1,2,C,D
2,3,E,F
3,4,G,H
4,5,I,J
5,6,K,L
6,7,M,N
7,8,O,P
8,9,Q,R
9,10,S,T


In [70]:
# concat vertically

pd.concat([df_row, df3], axis=1)

Unnamed: 0,id,Feature_1,Feature_2,Feature_3
0,1,A,B,12
1,2,C,D,13
2,3,E,F,14
3,4,G,H,15
4,5,I,J,16
5,6,K,L,17
6,7,M,N,15
7,8,O,P,12
8,9,Q,R,13
9,10,S,T,-1


## How to iterate over a dataframe's items

In [83]:
import pandas as pd
# read a csv file
tips_df = pd.read_csv('files/tips.csv')
tips_df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [84]:
# heads with argument
tips_df.head(10)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
5,25.29,4.71,Male,No,Sun,Dinner,4
6,8.77,2.0,Male,No,Sun,Dinner,2
7,26.88,3.12,Male,No,Sun,Dinner,4
8,15.04,1.96,Male,No,Sun,Dinner,2
9,14.78,3.23,Male,No,Sun,Dinner,2


In [87]:
# tail with argument
tips_df.tail(10)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
234,15.53,3.0,Male,Yes,Sat,Dinner,2
235,10.07,1.25,Male,No,Sat,Dinner,2
236,12.6,1.0,Male,Yes,Sat,Dinner,2
237,32.83,1.17,Male,Yes,Sat,Dinner,2
238,35.83,4.67,Female,No,Sat,Dinner,3
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.0,Female,Yes,Sat,Dinner,2
241,22.67,2.0,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2
243,18.78,3.0,Female,No,Thur,Dinner,2


In [88]:
for column in tips_df:
    print(f"gets the column names: {column}")

gets the column names: total_bill
gets the column names: tip
gets the column names: sex
gets the column names: smoker
gets the column names: day
gets the column names: time
gets the column names: size


In [89]:
for column in tips_df.values:
    print(f"gets whole the row: {column}")

gets whole the row: [16.99 1.01 'Female' 'No' 'Sun' 'Dinner' 2]
gets whole the row: [10.34 1.66 'Male' 'No' 'Sun' 'Dinner' 3]
gets whole the row: [21.01 3.5 'Male' 'No' 'Sun' 'Dinner' 3]
gets whole the row: [23.68 3.31 'Male' 'No' 'Sun' 'Dinner' 2]
gets whole the row: [24.59 3.61 'Female' 'No' 'Sun' 'Dinner' 4]
gets whole the row: [25.29 4.71 'Male' 'No' 'Sun' 'Dinner' 4]
gets whole the row: [8.77 2.0 'Male' 'No' 'Sun' 'Dinner' 2]
gets whole the row: [26.88 3.12 'Male' 'No' 'Sun' 'Dinner' 4]
gets whole the row: [15.04 1.96 'Male' 'No' 'Sun' 'Dinner' 2]
gets whole the row: [14.78 3.23 'Male' 'No' 'Sun' 'Dinner' 2]
gets whole the row: [10.27 1.71 'Male' 'No' 'Sun' 'Dinner' 2]
gets whole the row: [35.26 5.0 'Female' 'No' 'Sun' 'Dinner' 4]
gets whole the row: [15.42 1.57 'Male' 'No' 'Sun' 'Dinner' 2]
gets whole the row: [18.43 3.0 'Male' 'No' 'Sun' 'Dinner' 4]
gets whole the row: [14.83 3.02 'Female' 'No' 'Sun' 'Dinner' 2]
gets whole the row: [21.58 3.92 'Male' 'No' 'Sun' 'Dinner' 2]
gets 

In [91]:
for row_index, row in tips_df.iterrows():
#     print(f"gets whole the row: {row_index}, {row}")
    print(f"total bill for row: {row_index} is: {row['total_bill']}")

total bill for row: 0 is: 16.99
total bill for row: 1 is: 10.34
total bill for row: 2 is: 21.01
total bill for row: 3 is: 23.68
total bill for row: 4 is: 24.59
total bill for row: 5 is: 25.29
total bill for row: 6 is: 8.77
total bill for row: 7 is: 26.88
total bill for row: 8 is: 15.04
total bill for row: 9 is: 14.78
total bill for row: 10 is: 10.27
total bill for row: 11 is: 35.26
total bill for row: 12 is: 15.42
total bill for row: 13 is: 18.43
total bill for row: 14 is: 14.83
total bill for row: 15 is: 21.58
total bill for row: 16 is: 10.33
total bill for row: 17 is: 16.29
total bill for row: 18 is: 16.97
total bill for row: 19 is: 20.65
total bill for row: 20 is: 17.92
total bill for row: 21 is: 20.29
total bill for row: 22 is: 15.77
total bill for row: 23 is: 39.42
total bill for row: 24 is: 19.82
total bill for row: 25 is: 17.81
total bill for row: 26 is: 13.37
total bill for row: 27 is: 12.69
total bill for row: 28 is: 21.7
total bill for row: 29 is: 19.65
total bill for row: 30

In [93]:
# get columns
inner_df = tips_df[['total_bill', "tip"]]
inner_df.head()

Unnamed: 0,total_bill,tip
0,16.99,1.01
1,10.34,1.66
2,21.01,3.5
3,23.68,3.31
4,24.59,3.61


In [94]:
# numpy nd-array
inner_df.values[:5]

array([[16.99,  1.01],
       [10.34,  1.66],
       [21.01,  3.5 ],
       [23.68,  3.31],
       [24.59,  3.61]])

In [95]:
# iterate on a dataframe
for total_bill, tip in tips_df[['total_bill', "tip"]].values:
    print(f"whole bill: {total_bill + tip}")

whole bill: 18.0
whole bill: 12.0
whole bill: 24.51
whole bill: 26.99
whole bill: 28.2
whole bill: 30.0
whole bill: 10.77
whole bill: 30.0
whole bill: 17.0
whole bill: 18.009999999999998
whole bill: 11.98
whole bill: 40.26
whole bill: 16.99
whole bill: 21.43
whole bill: 17.85
whole bill: 25.5
whole bill: 12.0
whole bill: 20.0
whole bill: 20.47
whole bill: 24.0
whole bill: 22.0
whole bill: 23.04
whole bill: 18.0
whole bill: 47.0
whole bill: 23.0
whole bill: 20.15
whole bill: 15.37
whole bill: 14.69
whole bill: 26.0
whole bill: 22.65
whole bill: 11.0
whole bill: 20.85
whole bill: 18.060000000000002
whole bill: 23.14
whole bill: 21.05
whole bill: 27.66
whole bill: 18.31
whole bill: 20.0
whole bill: 21.0
whole bill: 36.269999999999996
whole bill: 18.28
whole bill: 20.0
whole bill: 17.0
whole bill: 11.0
whole bill: 36.0
whole bill: 21.29
whole bill: 27.23
whole bill: 38.4
whole bill: 30.6
whole bill: 21.04
whole bill: 15.04
whole bill: 12.889999999999999
whole bill: 40.010000000000005
whole

#### Task
    Task: Save the total paid money by costumers in a new column

In [97]:
total_paid = []
for total_bill, tip in tips_df[['total_bill', "tip"]].values:
    total_paid.append(total_bill + tip)
tips_df['total_paid'] = total_paid
tips_df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,total_paid
0,16.99,1.01,Female,No,Sun,Dinner,2,18.0
1,10.34,1.66,Male,No,Sun,Dinner,3,12.0
2,21.01,3.5,Male,No,Sun,Dinner,3,24.51
3,23.68,3.31,Male,No,Sun,Dinner,2,26.99
4,24.59,3.61,Female,No,Sun,Dinner,4,28.2


In [None]:
# list comprehension usage!
# tips_df['total_paid'] = [total_bill + tip for total_bill, tip in tips_df[['total_bill', "tip"]].values]

#### Task: Drop
    Drop the previously created column
   

In [102]:
tips_df.drop("total_paid", axis=1)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [104]:
# total_paid is not dropped?!
tips_df 

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,total_paid
0,16.99,1.01,Female,No,Sun,Dinner,2,18.00
1,10.34,1.66,Male,No,Sun,Dinner,3,12.00
2,21.01,3.50,Male,No,Sun,Dinner,3,24.51
3,23.68,3.31,Male,No,Sun,Dinner,2,26.99
4,24.59,3.61,Female,No,Sun,Dinner,4,28.20
...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,34.95
240,27.18,2.00,Female,Yes,Sat,Dinner,2,29.18
241,22.67,2.00,Male,Yes,Sat,Dinner,2,24.67
242,17.82,1.75,Male,No,Sat,Dinner,2,19.57


### Inplace argument
    when `inplace` argument is set to True, it means there would be no output and the df will be changed. Otherwise, the modified df is returned but the main df is not modified.
    One can overwrite main df with the returned modified df, or simply set the inplace argument to True. I, however, prefer the former one :)

In [105]:
tips_df.drop("total_paid", axis=1, inplace=True)
# or
# tips_df = tips_df.drop("total_paid", axis=1)

In [106]:
tips_df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [107]:
tips_df.drop("total_paid", axis=1, inplace=True)

KeyError: "['total_paid'] not found in axis"

In [108]:
# how to check a column exists in a df
"total_paid" in tips_df

False

In [109]:
"total_bill" in tips_df

True

In [110]:
# drop a row/sample
tips_df.drop(0, axis=0)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
5,25.29,4.71,Male,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [111]:
# drop several rows/samples
tips_df.drop([2, 5], axis=0)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
6,8.77,2.00,Male,No,Sun,Dinner,2
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


*:)*