# Pandas

### Used to manipulate data just like we did with numpy

- Pandas is mostly used for data analysis tasks in Python. NumPy is mostly used for working with Numerical values as it makes it easy to apply mathematical functions. Pandas library works well for numeric, alphabets, and heterogeneous types of data simultaneously.

In [48]:
import pandas as pd
iris = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data") # will take the path of the csv file to load data
iris
# Pandas treats the first row as the header

Unnamed: 0,5.1,3.5,1.4,0.2,Iris-setosa
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...
144,6.7,3.0,5.2,2.3,Iris-virginica
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica


In [49]:
print(type(iris)) # it's a pandas data frame
# data frame is a two dimensional table with rows and columns

<class 'pandas.core.frame.DataFrame'>


In [50]:
# df = iris -> any changes done in df will reflect in iris
df = iris.copy()
df.head() # A good way to look at some few initial entries

Unnamed: 0,5.1,3.5,1.4,0.2,Iris-setosa
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa


In [51]:
df.head(3)

Unnamed: 0,5.1,3.5,1.4,0.2,Iris-setosa
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa


In [52]:
# Changing column headers
df.columns = ['sl', 'sw', 'pl', 'pw', 'flower_type']

In [53]:
df.head(3)
# (0, 1, and 2) are labels for rows
# (sl, sw, etc...) are headers for columns

Unnamed: 0,sl,sw,pl,pw,flower_type
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa


In [54]:
print(df.shape)
print(df.dtypes) # prints the corresponding data type for each row

(149, 5)
sl             float64
sw             float64
pl             float64
pw             float64
flower_type     object
dtype: object


In [55]:
df.describe()

Unnamed: 0,sl,sw,pl,pw
count,149.0,149.0,149.0,149.0
mean,5.848322,3.051007,3.774497,1.205369
std,0.828594,0.433499,1.759651,0.761292
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.4,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [56]:
# df.column_name outputs only that particular column
df.sl

0      4.9
1      4.7
2      4.6
3      5.0
4      5.4
      ... 
144    6.7
145    6.3
146    6.5
147    6.2
148    5.9
Name: sl, Length: 149, dtype: float64

In [57]:
# or
df['sl']

0      4.9
1      4.7
2      4.6
3      5.0
4      5.4
      ... 
144    6.7
145    6.3
146    6.5
147    6.2
148    5.9
Name: sl, Length: 149, dtype: float64

In [58]:
df.isnull()

Unnamed: 0,sl,sw,pl,pw,flower_type
0,False,False,False,False,False
1,False,False,False,False,False
2,False,False,False,False,False
3,False,False,False,False,False
4,False,False,False,False,False
...,...,...,...,...,...
144,False,False,False,False,False
145,False,False,False,False,False
146,False,False,False,False,False
147,False,False,False,False,False


In [59]:
df.isnull().sum() # Showing me how many null entries we have in each column

sl             0
sw             0
pl             0
pw             0
flower_type    0
dtype: int64

In [60]:
df.iloc[1:4, 2:3] # with iloc we can use slicing to access data

Unnamed: 0,pl
1,1.3
2,1.5
3,1.4


# Manipulating data in data-frame

In [61]:
a = df.drop(0) # removes the row which has the label '0'
a.head()

Unnamed: 0,sl,sw,pl,pw,flower_type
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
5,4.6,3.4,1.4,0.3,Iris-setosa


In [62]:
# dropping the column without making a copy
df.drop(0, inplace = True) # by default inplace is 'False'
df.head(3)

Unnamed: 0,sl,sw,pl,pw,flower_type
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa


In [64]:
print(df.index) # outputs current labels 

RangeIndex(start=1, stop=149, step=1)


In [66]:
df.index[0], df.index[3] # returns the label of the 0th row and the 3rd row

(1, 4)

In [68]:
df.drop(df.index[0], inplace = True)
df.head()

Unnamed: 0,sl,sw,pl,pw,flower_type
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
5,4.6,3.4,1.4,0.3,Iris-setosa
6,5.0,3.4,1.5,0.2,Iris-setosa


In [69]:
df.drop(df.index[[0, 1]], inplace = True) # removing more than one row
df.head()

Unnamed: 0,sl,sw,pl,pw,flower_type
4,5.4,3.9,1.7,0.4,Iris-setosa
5,4.6,3.4,1.4,0.3,Iris-setosa
6,5.0,3.4,1.5,0.2,Iris-setosa
7,4.4,2.9,1.4,0.2,Iris-setosa
8,4.9,3.1,1.5,0.1,Iris-setosa


In [70]:
df.sl > 3 # returns all the rows who are satisfying this condition

4      True
5      True
6      True
7      True
8      True
       ... 
144    True
145    True
146    True
147    True
148    True
Name: sl, Length: 145, dtype: bool

In [71]:
df[df.sl > 5] # returns all those rows with values greater than 5

Unnamed: 0,sl,sw,pl,pw,flower_type
4,5.4,3.9,1.7,0.4,Iris-setosa
9,5.4,3.7,1.5,0.2,Iris-setosa
13,5.8,4.0,1.2,0.2,Iris-setosa
14,5.7,4.4,1.5,0.4,Iris-setosa
15,5.4,3.9,1.3,0.4,Iris-setosa
...,...,...,...,...,...
144,6.7,3.0,5.2,2.3,Iris-virginica
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica


In [72]:
df[df.flower_type == 'Iris-setosa'].describe() # values for this kind of flower 
# Basically we can get lot of interesting insights on the data

Unnamed: 0,sl,sw,pl,pw
count,45.0,45.0,45.0,45.0
mean,5.022222,3.433333,1.471111,0.248889
std,0.362998,0.391384,0.180432,0.112052
min,4.3,2.3,1.0,0.1
25%,4.8,3.2,1.4,0.2
50%,5.0,3.4,1.5,0.2
75%,5.2,3.7,1.6,0.3
max,5.8,4.4,1.9,0.6


In [73]:
print(df.loc[8]) # prints the 8th labelled row, iloc does it by position

sl                     4.9
sw                     3.1
pl                     1.5
pw                     0.1
flower_type    Iris-setosa
Name: 8, dtype: object


In [74]:
df.loc[0] = [1,2,3,4,'Iris-setosa'] # adds a row with label 0

In [76]:
df.tail()

Unnamed: 0,sl,sw,pl,pw,flower_type
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica
148,5.9,3.0,5.1,1.8,Iris-virginica
0,1.0,2.0,3.0,4.0,Iris-setosa


# Example Problems

In [None]:
Problem - Count of Flower

Problem Statement
Find and print count of each kind of flower (separated by space)?

Print the count as Integer Value.
Output Format
count1 count2 count3 .....

In [80]:
import pandas as pd
columns = ['sl','sw','pl','pw','flower_type']
iris = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", names = columns) 
count = iris['flower_type'].value_counts()
for i in range(3):
    print(count[i], end = " ")

# or we could directly write for i in iris['flower_type'].value_counts():
# or for i in count(): print(i, end = " ")

50 50 50 

In [None]:
names : array-like in pandas.read_csv(), optional

List of column names to use. If the file contains a header row, then you should explicitly 
pass header=0 to override the column names. Duplicates in this list are not allowed.

Series.value_counts(normalize=False, sort=True, ascending=False, bins=None, dropna=True):
    Return a Series containing counts of unique values.

In [None]:
Problem - Iris Virginica

Problem Statement
Find the data of flower “Iris-virginica” type where petal-length > 1.5?

Print the all the feature values.
Output Format
feature1 feature2 feature3 feature4 feature5
feature1 feature2 feature3 feature4 feature5
feature1 feature2 feature3 feature4 feature5
feature1 feature2 feature3 feature4 feature5
. . .

In [98]:
# The warning message suggests that the line arr = virginica[df.pl > 1.5] might create a new DataFrame with a different index than the original DataFrame virginica. This can cause issues when trying to index or modify the DataFrame.

# To resolve this issue, you can reset the index of the virginica DataFrame after filtering it with df.pl > 1.5. Here's the updated code:

import pandas as pd
columns = ['sl','sw','pl','pw','flower_type']
iris = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', names=columns)
iris = iris[iris.flower_type=='Iris-virginica']
iris = iris[iris.pl>1.5]
ans = iris.values
for i in range(iris.shape[0]):
    for j in range(iris.shape[1]):
        print(ans[i][j], end = " ")
    print()
    
# Alternative Code:

# import pandas as pd
# columns = ['sl','sw','pl','pw','flower_type']
# iris = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', names=columns)
# iris = iris[iris.flower_type=='Iris-virginica']
# iris = iris[iris.pl>1.5]
# iris = iris.values
# for row in iris :
#   print(*row)

6.3 3.3 6.0 2.5 Iris-virginica 
5.8 2.7 5.1 1.9 Iris-virginica 
7.1 3.0 5.9 2.1 Iris-virginica 
6.3 2.9 5.6 1.8 Iris-virginica 
6.5 3.0 5.8 2.2 Iris-virginica 
7.6 3.0 6.6 2.1 Iris-virginica 
4.9 2.5 4.5 1.7 Iris-virginica 
7.3 2.9 6.3 1.8 Iris-virginica 
6.7 2.5 5.8 1.8 Iris-virginica 
7.2 3.6 6.1 2.5 Iris-virginica 
6.5 3.2 5.1 2.0 Iris-virginica 
6.4 2.7 5.3 1.9 Iris-virginica 
6.8 3.0 5.5 2.1 Iris-virginica 
5.7 2.5 5.0 2.0 Iris-virginica 
5.8 2.8 5.1 2.4 Iris-virginica 
6.4 3.2 5.3 2.3 Iris-virginica 
6.5 3.0 5.5 1.8 Iris-virginica 
7.7 3.8 6.7 2.2 Iris-virginica 
7.7 2.6 6.9 2.3 Iris-virginica 
6.0 2.2 5.0 1.5 Iris-virginica 
6.9 3.2 5.7 2.3 Iris-virginica 
5.6 2.8 4.9 2.0 Iris-virginica 
7.7 2.8 6.7 2.0 Iris-virginica 
6.3 2.7 4.9 1.8 Iris-virginica 
6.7 3.3 5.7 2.1 Iris-virginica 
7.2 3.2 6.0 1.8 Iris-virginica 
6.2 2.8 4.8 1.8 Iris-virginica 
6.1 3.0 4.9 1.8 Iris-virginica 
6.4 2.8 5.6 2.1 Iris-virginica 
7.2 3.0 5.8 1.6 Iris-virginica 
7.4 2.8 6.1 1.9 Iris-virginica 
7.9 3.8 

In [None]:
In Pandas, the shape attribute of a DataFrame returns a tuple representing the dimensions of the data frame. 
The first element of the tuple is the number of rows, and the second element is the number of columns.

Therefore, in the line for i in range(df.shape[0]):, df.shape[0] is used to iterate over the number of
rows in the df data frame.

Similarly, in the line for j in range(df.shape[1]):, df.shape[1] is used to iterate over the number of 
columns in the df data frame.

In [None]:
Problem - Iris Values

Problem Statement
Find and print the minimum, maximum and average value of the feature for each kind of flower ?

Print the value with two decimal places.
Note: Order for flower is Iris-setosa, Iris-versicolor and Iris-virginica.
Output Format
minSL minSW minPL minPW Iris-setosa
maxSL maxSW maxPL maxPW Iris-setosa
avgSL avgSW avgPL avgPW Iris-setosa
. . .
. . .
. . .

In [110]:
import pandas as pd

columns = ['sl', 'sw', 'pl', 'pw', 'flower_type']
iris = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', names = columns)

species1 = 'Iris-setosa'
species2 = 'Iris-versicolor'
species3 = 'Iris-virginica'

class1 = iris[iris.flower_type == species1].describe()
class2 = iris[iris.flower_type == species2].describe()
class3 = iris[iris.flower_type == species3].describe()

for i in class1.iloc[[3]].values[0]:
    print(format(i, '.2f'), end = " ")
print(species1)

for i in class2.iloc[[7]].values[0]:
    print(format(i, '.2f'), end = " ")
print(species1)

for i in class3.iloc[[1]].values[0]:
    print(format(i, '.2f'), end = " ")
print(species1)

for i in class3.iloc[[3]].values[0]:
    print(format(i, '.2f'), end = " ")
print(species2)

for i in class3.iloc[[7]].values[0]:
    print(format(i, '.2f'), end = " ")
print(species2)

for i in class3.iloc[[1]].values[0]:
    print(format(i, '.2f'), end = " ")
print(species2)

for i in class3.iloc[[3]].values[0]:
    print(format(i, '.2f'), end = " ")
print(species3)

for i in class3.iloc[[7]].values[0]:
    print(format(i, '.2f'), end = " ")
print(species3)

for i in class3.iloc[[1]].values[0]:
    print(format(i, '.2f'), end = " ")
print(species3)

4.30 2.30 1.00 0.10 Iris-setosa
7.00 3.40 5.10 1.80 Iris-setosa
6.59 2.97 5.55 2.03 Iris-setosa
4.90 2.20 4.50 1.40 Iris-versicolor
7.90 3.80 6.90 2.50 Iris-versicolor
6.59 2.97 5.55 2.03 Iris-versicolor
4.90 2.20 4.50 1.40 Iris-virginica
7.90 3.80 6.90 2.50 Iris-virginica
6.59 2.97 5.55 2.03 Iris-virginica


In [104]:
# Alternative way of solving the above problem: 

import pandas as pd
columns = ['sl','sw','pl','pw','flower_type']
iris = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', names=columns)

c = iris[iris.flower_type=='Iris-setosa']
print('%.2f'%min(c['sl']),'%.2f'%min(c['sw']),'%.2f'%min(c['pl']),'%.2f'%min(c['pw']),c.iloc[0,4])
print('%.2f'%max(c['sl']),'%.2f'%max(c['sw']),'%.2f'%max(c['pl']),'%.2f'%max(c['pw']),c.iloc[0,4])
print('%.2f'%(c['sl'].mean()),'%.2f'%(c['sw'].mean()),'%.2f'%(c['pl'].mean()),'%.2f'%(c['pw'].mean()),c.iloc[0,4])

c1 = iris[iris.flower_type=='Iris-versicolor']
print('%.2f'%min(c1['sl']),'%.2f'%min(c1['sw']),'%.2f'%min(c1['pl']),'%.2f'%min(c1['pw']),c1.iloc[0,4])
print('%.2f'%max(c1['sl']),'%.2f'%max(c1['sw']),'%.2f'%max(c1['pl']),'%.2f'%max(c1['pw']),c1.iloc[0,4])
print('%.2f'%(c1['sl'].mean()),'%.2f'%(c1['sw'].mean()),'%.2f'%(c1['pl'].mean()),'%.2f'%(c1['pw'].mean()),c1.iloc[0,4])

c2 = iris[iris.flower_type=='Iris-virginica']
print('%.2f'%min(c2['sl']),'%.2f'%min(c2['sw']),'%.2f'%min(c2['pl']),'%.2f'%min(c2['pw']),c2.iloc[0,4])
print('%.2f'%max(c2['sl']),'%.2f'%max(c2['sw']),'%.2f'%max(c2['pl']),'%.2f'%max(c2['pw']),c2.iloc[0,4])
print('%.2f'%(c2['sl'].mean()),'%.2f'%(c2['sw'].mean()),'%.2f'%(c2['pl'].mean()),'%.2f'%(c2['pw'].mean()),c2.iloc[0,4])

4.30 2.30 1.00 0.10 Iris-setosa
5.80 4.40 1.90 0.60 Iris-setosa
5.01 3.42 1.46 0.24 Iris-setosa
4.90 2.00 3.00 1.00 Iris-versicolor
7.00 3.40 5.10 1.80 Iris-versicolor
5.94 2.77 4.26 1.33 Iris-versicolor
4.90 2.20 4.50 1.40 Iris-virginica
7.90 3.80 6.90 2.50 Iris-virginica
6.59 2.97 5.55 2.03 Iris-virginica


# Handling NAN

In [113]:
df['diff_pl_pw'] = df['pl'] - df['pw']
df.tail()

Unnamed: 0,sl,sw,pl,pw,flower_type,diff_pl_pw
145,6.7,3.0,5.2,2.3,Iris-virginica,2.9
146,6.3,2.5,5.0,1.9,Iris-virginica,3.1
147,6.5,3.0,5.2,2.0,Iris-virginica,3.2
148,6.2,3.4,5.4,2.3,Iris-virginica,3.1
149,5.9,3.0,5.1,1.8,Iris-virginica,3.3


In [115]:
# creating custom NAN values
import numpy as np

df.iloc[2:4, 1:3] = np.nan
df.head()

Unnamed: 0,sl,sw,pl,pw,flower_type,diff_pl_pw
0,5.1,3.5,1.4,0.2,Iris-setosa,1.2
1,4.9,3.0,1.4,0.2,Iris-setosa,1.2
2,4.7,,,0.2,Iris-setosa,1.1
3,4.6,,,0.2,Iris-setosa,1.3
4,5.0,3.6,1.4,0.2,Iris-setosa,1.2


In [116]:
df.describe()

Unnamed: 0,sl,sw,pl,pw,diff_pl_pw
count,150.0,148.0,148.0,150.0,150.0
mean,5.843333,3.052703,3.790541,1.198667,2.56
std,0.828066,0.436349,1.754618,0.763161,1.050152
min,4.3,2.0,1.0,0.1,0.8
25%,5.1,2.8,1.6,0.3,1.4
50%,5.8,3.0,4.4,1.3,2.9
75%,6.4,3.3,5.1,1.8,3.3
max,7.9,4.4,6.9,2.5,4.7


In [119]:
df.dropna(inplace = True) # deletes rows with NaN values

In [120]:
df.head()

Unnamed: 0,sl,sw,pl,pw,flower_type,diff_pl_pw
0,5.1,3.5,1.4,0.2,Iris-setosa,1.2
1,4.9,3.0,1.4,0.2,Iris-setosa,1.2
4,5.0,3.6,1.4,0.2,Iris-setosa,1.2
5,5.4,3.9,1.7,0.4,Iris-setosa,1.3
6,4.6,3.4,1.4,0.3,Iris-setosa,1.1


In [121]:
df.reset_index(drop = True, inplace = True) # resets all the indices
df.head()

Unnamed: 0,sl,sw,pl,pw,flower_type,diff_pl_pw
0,5.1,3.5,1.4,0.2,Iris-setosa,1.2
1,4.9,3.0,1.4,0.2,Iris-setosa,1.2
2,5.0,3.6,1.4,0.2,Iris-setosa,1.2
3,5.4,3.9,1.7,0.4,Iris-setosa,1.3
4,4.6,3.4,1.4,0.3,Iris-setosa,1.1


In [122]:
df.iloc[2:4, 1:3] = np.nan
df.head()

Unnamed: 0,sl,sw,pl,pw,flower_type,diff_pl_pw
0,5.1,3.5,1.4,0.2,Iris-setosa,1.2
1,4.9,3.0,1.4,0.2,Iris-setosa,1.2
2,5.0,,,0.2,Iris-setosa,1.2
3,5.4,,,0.4,Iris-setosa,1.3
4,4.6,3.4,1.4,0.3,Iris-setosa,1.1


In [126]:
df.sw.fillna(df.sw.mean(), inplace = True)
df.pl.fillna(df.pl.mean(), inplace = True)

In [127]:
df.head()

Unnamed: 0,sl,sw,pl,pw,flower_type,diff_pl_pw
0,5.1,3.5,1.4,0.2,Iris-setosa,1.2
1,4.9,3.0,1.4,0.2,Iris-setosa,1.2
2,5.0,3.043151,3.821233,0.2,Iris-setosa,1.2
3,5.4,3.043151,3.821233,0.4,Iris-setosa,1.3
4,4.6,3.4,1.4,0.3,Iris-setosa,1.1


In [130]:
# Another Example
a = df[df.flower_type == 'Iris-setosa']
a.pl.mean()

1.561301369863014

# Handling Strings In Data

In [131]:
df['Gender'] = 'Female'
df.head()

Unnamed: 0,sl,sw,pl,pw,flower_type,diff_pl_pw,Gender
0,5.1,3.5,1.4,0.2,Iris-setosa,1.2,Female
1,4.9,3.0,1.4,0.2,Iris-setosa,1.2,Female
2,5.0,3.043151,3.821233,0.2,Iris-setosa,1.2,Female
3,5.4,3.043151,3.821233,0.4,Iris-setosa,1.3,Female
4,4.6,3.4,1.4,0.3,Iris-setosa,1.1,Female


In [132]:
df.iloc[0:10, 6] = 'Male'
df.head()

Unnamed: 0,sl,sw,pl,pw,flower_type,diff_pl_pw,Gender
0,5.1,3.5,1.4,0.2,Iris-setosa,1.2,Male
1,4.9,3.0,1.4,0.2,Iris-setosa,1.2,Male
2,5.0,3.043151,3.821233,0.2,Iris-setosa,1.2,Male
3,5.4,3.043151,3.821233,0.4,Iris-setosa,1.3,Male
4,4.6,3.4,1.4,0.3,Iris-setosa,1.1,Male


In [133]:
df.tail()

Unnamed: 0,sl,sw,pl,pw,flower_type,diff_pl_pw,Gender
143,6.7,3.0,5.2,2.3,Iris-virginica,2.9,Female
144,6.3,2.5,5.0,1.9,Iris-virginica,3.1,Female
145,6.5,3.0,5.2,2.0,Iris-virginica,3.2,Female
146,6.2,3.4,5.4,2.3,Iris-virginica,3.1,Female
147,5.9,3.0,5.1,1.8,Iris-virginica,3.3,Female


In [135]:
def f(s):
    if s == 'Male':
        return 0
    else:
        return 1

df['sex'] = df.Gender.apply(f)
print(df.head())
print(df.tail())

    sl        sw        pl   pw  flower_type  diff_pl_pw Gender  sex
0  5.1  3.500000  1.400000  0.2  Iris-setosa         1.2   Male    0
1  4.9  3.000000  1.400000  0.2  Iris-setosa         1.2   Male    0
2  5.0  3.043151  3.821233  0.2  Iris-setosa         1.2   Male    0
3  5.4  3.043151  3.821233  0.4  Iris-setosa         1.3   Male    0
4  4.6  3.400000  1.400000  0.3  Iris-setosa         1.1   Male    0
      sl   sw   pl   pw     flower_type  diff_pl_pw  Gender  sex
143  6.7  3.0  5.2  2.3  Iris-virginica         2.9  Female    1
144  6.3  2.5  5.0  1.9  Iris-virginica         3.1  Female    1
145  6.5  3.0  5.2  2.0  Iris-virginica         3.2  Female    1
146  6.2  3.4  5.4  2.3  Iris-virginica         3.1  Female    1
147  5.9  3.0  5.1  1.8  Iris-virginica         3.3  Female    1


In [136]:
del df['Gender']
df.head()

Unnamed: 0,sl,sw,pl,pw,flower_type,diff_pl_pw,sex
0,5.1,3.5,1.4,0.2,Iris-setosa,1.2,0
1,4.9,3.0,1.4,0.2,Iris-setosa,1.2,0
2,5.0,3.043151,3.821233,0.2,Iris-setosa,1.2,0
3,5.4,3.043151,3.821233,0.4,Iris-setosa,1.3,0
4,4.6,3.4,1.4,0.3,Iris-setosa,1.1,0
