#### Description of the dataset:

https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.names

### Load libraries

In [1]:
import os
import pandas as pd
import numpy as np

### Load dataframe abalone

In [2]:
abalone = pd.read_table('abalone.txt', sep = ',', header = None)

### Print first five rows of the dataframe

In [3]:
abalone.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


### Create column names for a dataframe

In [4]:
abalone.columns = ['Sex', 'Length', 'Diameter', 'Height', 
                   'Whole weight', 'Shucked weight',
                  'Viscera weight', 'Shell weight', 'Rings']

### Print first five rows of a dataframe

In [5]:
abalone.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


### Replace under-scores for spaces between column names 

In [6]:
abalone.columns = abalone.columns.str.replace(' ', '_')

### Print first five rows of a dataframe

In [7]:
abalone.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


### Print the shape of the dataframe

In [8]:
abalone.shape

(4177, 9)

### Check of missing cell values

In [9]:
abalone.isnull().sum(axis = 0)

Sex               0
Length            0
Diameter          0
Height            0
Whole_weight      0
Shucked_weight    0
Viscera_weight    0
Shell_weight      0
Rings             0
dtype: int64

### See the type of data

In [10]:
type(abalone)

pandas.core.frame.DataFrame

#### How do I slice a column from the dataframe, abalone?

In [11]:
abalone.loc[:, 'Sex'].value_counts()

M    1528
I    1342
F    1307
Name: Sex, dtype: int64

#### How do I slice multiple columns from the a dataframe?

In [12]:
abalone.loc[:, ["Sex", "Height"]].head()

Unnamed: 0,Sex,Height
0,M,0.095
1,M,0.09
2,F,0.135
3,M,0.125
4,I,0.08


#### How do I slice a row from a dataframe?

In [13]:
abalone.loc[0, :]

Sex                    M
Length             0.455
Diameter           0.365
Height             0.095
Whole_weight       0.514
Shucked_weight    0.2245
Viscera_weight     0.101
Shell_weight        0.15
Rings                 15
Name: 0, dtype: object

#### How do I slice multiple rows in a dataframe?

In [14]:
abalone.loc[3:5, :]

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
5,I,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,8


#### How do I drop a column in a dataframe?

In [15]:
abalone.drop(axis = 1, inplace = False, labels = 'Sex').head()

Unnamed: 0,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


#### How do I drop multiple columns in a dataframe?

In [16]:
abalone.drop(axis = 1, inplace = False, labels = ["Length", "Diameter", "Rings"]).head()

Unnamed: 0,Sex,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight
0,M,0.095,0.514,0.2245,0.101,0.15
1,M,0.09,0.2255,0.0995,0.0485,0.07
2,F,0.135,0.677,0.2565,0.1415,0.21
3,M,0.125,0.516,0.2155,0.114,0.155
4,I,0.08,0.205,0.0895,0.0395,0.055


#### How do I drop a row from a dataframe?

In [17]:
abalone.drop(axis = 0, inplace = False, labels = 3).head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
5,I,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,8


#### How do I drop multiple rows from a dataframe?

In [18]:
abalone.drop(axis = 0, inplace = False, labels = [1,3,5]).head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
6,F,0.53,0.415,0.15,0.7775,0.237,0.1415,0.33,20
7,F,0.545,0.425,0.125,0.768,0.294,0.1495,0.26,16


#### How do I slice only male's information from the dataframe, abalone?

In [19]:
abalone[abalone['Sex'] == 'M'].head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
8,M,0.475,0.37,0.125,0.5095,0.2165,0.1125,0.165,9
11,M,0.43,0.35,0.11,0.406,0.1675,0.081,0.135,10


#### How do slice the 15 years old male abalone?

In [20]:
abalone[(abalone.Sex == 'M') & (abalone.Rings == 15)].head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
28,M,0.605,0.475,0.18,0.9365,0.394,0.219,0.295,15
90,M,0.565,0.425,0.135,0.8115,0.341,0.1675,0.255,15
94,M,0.695,0.56,0.19,1.494,0.588,0.3425,0.485,15
101,M,0.55,0.435,0.145,0.843,0.328,0.1915,0.255,15


#### How do I categorize abalone's age into young, middle, and old?

In [21]:
young = abalone[abalone.Rings < 8].copy()
middle = abalone[(abalone.Rings >= 8) & (abalone.Rings <= 11)].copy()
old = abalone[abalone.Rings > 11].copy()

young["Age"] = 'young'
middle['Age'] = 'middle'
old['Age'] = 'old'

dataframes = [young, middle, old]
abalone_copy = pd.concat(dataframes)

abalone_copy.sort_index(axis = 0, ascending = True).head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings,Age
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15,old
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7,young
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9,middle
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10,middle
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7,young


In [22]:
booleans = []
for i in abalone.Rings:
    if i < 8:
        booleans.append('young')
    elif i > 11:
        booleans.append("old")
    else:
        booleans.append('middle')
        
Age = pd.DataFrame({'Age': booleans})
df = [abalone, Age]
df1 = pd.concat(df, axis = 1)
df1.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings,Age
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15,old
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7,young
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9,middle
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10,middle
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7,young


#### How do I create a binary variable from the existing variable of a dataframe?

In [23]:
booleans = []
for i in abalone.Shell_weight:
    if i < 0.20:
        booleans.append('light')
    else:
        booleans.append("heavy")
        
ShellWeight = pd.DataFrame({'ShellWeight': booleans})
df = [abalone, ShellWeight]
df2 = pd.concat(df, axis = 1)
df2.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings,ShellWeight
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15,light
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7,light
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9,heavy
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10,light
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7,light


In [24]:
booleans = []
for i in abalone.Shell_weight:
    if i < 0.20:
        booleans.append(0)
    else:
        booleans.append(1)
        
ShellWeight = pd.DataFrame({'ShellWeight': booleans})
df = [abalone, ShellWeight]
df2 = pd.concat(df, axis = 1)
df2.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings,ShellWeight
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15,0
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7,0
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9,1
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10,0
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7,0


#### How do I replace an existing cell value with a different value?

In [25]:
df2.loc[df2.index == 0, "Sex"] = 'male'
df2.head(3)

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings,ShellWeight
0,male,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15,0
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7,0
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9,1


#### How do I see the intersection of the three variables of a dataframe?

In [26]:
abalone[(abalone.Sex == 'M') & (abalone.Length < 0.455) & (abalone.Rings == 15)]

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
542,M,0.42,0.325,0.115,0.2885,0.1,0.057,0.1135,15
3900,M,0.445,0.345,0.14,0.476,0.2055,0.1015,0.1085,15


#### How do I see the intersection of the four variables in a dataframe?

In [27]:
abalone[(abalone.Sex == 'M') & (abalone.Length == 0.420) & (abalone.Diameter >=0.325) & (abalone.Rings == 15)]

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
542,M,0.42,0.325,0.115,0.2885,0.1,0.057,0.1135,15


#### How do I slice a fraction of a dataframe randomly?

In [28]:
abalone_frac = abalone.sample(frac = 0.60, random_state = 98)
abalone_frac.shape

(2506, 9)

In [29]:
abalone.shape

(4177, 9)

In [30]:
abalone_sample = abalone.sample(n = 5, random_state = 75)
abalone_sample

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
78,F,0.555,0.425,0.14,0.788,0.282,0.1595,0.285,11
1756,F,0.725,0.565,0.21,2.1425,1.03,0.487,0.503,14
1636,M,0.575,0.47,0.185,0.985,0.3745,0.2175,0.355,10
2363,M,0.57,0.475,0.195,1.0295,0.4635,0.1905,0.305,18
2039,I,0.285,0.215,0.07,0.1075,0.051,0.0225,0.027,6


#### How do I replace cell values with a NaN of a column?

In [31]:
abalone_sample.loc[abalone_sample.Rings == 11, "Rings"] = np.nan
abalone_sample

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
78,F,0.555,0.425,0.14,0.788,0.282,0.1595,0.285,
1756,F,0.725,0.565,0.21,2.1425,1.03,0.487,0.503,14.0
1636,M,0.575,0.47,0.185,0.985,0.3745,0.2175,0.355,10.0
2363,M,0.57,0.475,0.195,1.0295,0.4635,0.1905,0.305,18.0
2039,I,0.285,0.215,0.07,0.1075,0.051,0.0225,0.027,6.0


#### How do I replace cell values of NaNs with something else?

In [32]:
abalone_sample.fillna(axis = 1, value = 11)

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
78,F,0.555,0.425,0.14,0.788,0.282,0.1595,0.285,11.0
1756,F,0.725,0.565,0.21,2.1425,1.03,0.487,0.503,14.0
1636,M,0.575,0.47,0.185,0.985,0.3745,0.2175,0.355,10.0
2363,M,0.57,0.475,0.195,1.0295,0.4635,0.1905,0.305,18.0
2039,I,0.285,0.215,0.07,0.1075,0.051,0.0225,0.027,6.0


#### How do I concatenate two dataframes?

In [33]:
head = abalone.head()
tail = abalone.tail()

In [34]:
concat = pd.concat((head, tail), axis = 0, join = 'outer')
concat

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
4172,F,0.565,0.45,0.165,0.887,0.37,0.239,0.249,11
4173,M,0.59,0.44,0.135,0.966,0.439,0.2145,0.2605,10
4174,M,0.6,0.475,0.205,1.176,0.5255,0.2875,0.308,9
4175,F,0.625,0.485,0.15,1.0945,0.531,0.261,0.296,10
4176,M,0.71,0.555,0.195,1.9485,0.9455,0.3765,0.495,12


#### How do I change a column name of a dataframe?

In [35]:
concat.columns = concat.columns.str.replace('Sex', 'Gender')
concat.head()

Unnamed: 0,Gender,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


#### How do I replace underscores of the column labels with spaces?

In [36]:
concat.columns = concat.columns.str.replace("_", " ")
concat.head()

Unnamed: 0,Gender,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


#### How do I add underscores of the column labels with spaces?

In [37]:
concat.columns = concat.columns.str.replace(" ", "_")
concat.head()

Unnamed: 0,Gender,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


#### How do I create a new column from the existing columns of a dataframe? 

In [38]:
df1['Gender_Age'] = df1.Sex + ", " + df1.Age
df1.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings,Age,Gender_Age
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15,old,"M, old"
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7,young,"M, young"
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9,middle,"F, middle"
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10,middle,"M, middle"
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7,young,"I, young"


In [39]:
df1['Length_Rings'] = df1.Length + df1.Rings
df1.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings,Age,Gender_Age,Length_Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15,old,"M, old",15.455
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7,young,"M, young",7.35
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9,middle,"F, middle",9.53
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10,middle,"M, middle",10.44
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7,young,"I, young",7.33
