## Import pandas as pd

In [1]:
import pandas as pd

## Read csv format

In [5]:
df0 = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data",
names = ["Sex"," Length"," Diameter","Height","Whole weight","Shucked weight","Viscera weight","Shell weight","Rings"] )

## Dataframe

In [3]:
df0.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


## This displays the dimensions of our dataset and allows us to see the column and row count


In [306]:
shape = df0.shape

In [307]:
print(shape)

(4177, 9)


## Viewing the ‘tail end’ of the dataset.

In [308]:
df0.tail()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
4172,F,0.565,0.45,0.165,0.887,0.37,0.239,0.249,11
4173,M,0.59,0.44,0.135,0.966,0.439,0.2145,0.2605,10
4174,M,0.6,0.475,0.205,1.176,0.5255,0.2875,0.308,9
4175,F,0.625,0.485,0.15,1.0945,0.531,0.261,0.296,10
4176,M,0.71,0.555,0.195,1.9485,0.9455,0.3765,0.495,12


## describe all numerical features 

In [309]:
df0[[" Length"," Diameter","Height","Whole weight","Shucked weight","Viscera weight",
                          "Shell weight","Rings"]].describe() 
#df.describe() This displays a set of descriptive statistics for our dataset 

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
count,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0
mean,0.523992,0.407881,0.139516,0.828742,0.359367,0.180594,0.238831,9.933684
std,0.120093,0.09924,0.041827,0.490389,0.221963,0.109614,0.139203,3.224169
min,0.075,0.055,0.0,0.002,0.001,0.0005,0.0015,1.0
25%,0.45,0.35,0.115,0.4415,0.186,0.0935,0.13,8.0
50%,0.545,0.425,0.14,0.7995,0.336,0.171,0.234,9.0
75%,0.615,0.48,0.165,1.153,0.502,0.253,0.329,11.0
max,0.815,0.65,1.13,2.8255,1.488,0.76,1.005,29.0


## Describe all categorical features

In [310]:
df0[['Sex']].describe()

Unnamed: 0,Sex
count,4177
unique,3
top,M
freq,1528


# Mean for all numerical columns

In [6]:
df0[[" Length"," Diameter","Height","Whole weight","Shucked weight","Viscera weight",
                          "Shell weight","Rings"]].mean()
# We use the mean() function if we wish to take the mean or average of a particular column within our dataset.

 Length           0.523992
 Diameter         0.407881
Height            0.139516
Whole weight      0.828742
Shucked weight    0.359367
Viscera weight    0.180594
Shell weight      0.238831
Rings             9.933684
dtype: float64

## Median for all numerical columns

In [8]:
df0[[" Length"," Diameter","Height","Whole weight","Shucked weight","Viscera weight",
                          "Shell weight","Rings"]].median()
#The median() method returns a Series with the median value of each column

 Length           0.5450
 Diameter         0.4250
Height            0.1400
Whole weight      0.7995
Shucked weight    0.3360
Viscera weight    0.1710
Shell weight      0.2340
Rings             9.0000
dtype: float64

## Mode for all numerical columns

In [313]:
df0[[" Length"," Diameter","Height","Whole weight","Shucked weight","Viscera weight",
                          "Shell weight","Rings"]].mode()
#The mode() method returns the mode value of each column.

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,0.55,0.45,0.15,0.2225,0.175,0.1715,0.275,9.0
1,0.625,,,,,,,


## Standard deviation for all numerical columns


In [314]:
df0[[" Length"," Diameter","Height","Whole weight","Shucked weight","Viscera weight",
                          "Shell weight","Rings"]].std()
# The Pandas std() is defined as a function for calculating the standard deviation

 Length           0.120093
 Diameter         0.099240
Height            0.041827
Whole weight      0.490389
Shucked weight    0.221963
Viscera weight    0.109614
Shell weight      0.139203
Rings             3.224169
dtype: float64

## Sum for all numerical columns

In [9]:
df0[[" Length"," Diameter","Height","Whole weight","Shucked weight","Viscera weight",
                          "Shell weight","Rings"]].sum()
#We can use the sum() function to find the sum of all the values over the index axis

 Length            2188.7150
 Diameter          1703.7200
Height              582.7600
Whole weight       3461.6560
Shucked weight     1501.0780
Viscera weight      754.3395
Shell weight        997.5965
Rings             41493.0000
dtype: float64

## Count for all columns

In [316]:
df0[["Sex"," Length"," Diameter","Height","Whole weight","Shucked weight","Viscera weight",
                          "Shell weight","Rings"]].count()
# The count() method counts the number of not empty values for each row, or column

Sex               4177
 Length           4177
 Diameter         4177
Height            4177
Whole weight      4177
Shucked weight    4177
Viscera weight    4177
Shell weight      4177
Rings             4177
dtype: int64

## Value_counts for all categorical columns

In [317]:
df0[['Sex']].value_counts() #value_counts() function returns object containing counts of unique values

Sex
M      1528
I      1342
F      1307
dtype: int64

## Subset your DataFrame based on one condition

In [318]:
df0[df0.Rings<7] 

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
42,I,0.240,0.175,0.045,0.0700,0.0315,0.0235,0.0200,5
43,I,0.205,0.150,0.055,0.0420,0.0255,0.0150,0.0120,5
44,I,0.210,0.150,0.050,0.0420,0.0175,0.0125,0.0150,4
48,I,0.325,0.245,0.070,0.1610,0.0755,0.0255,0.0450,6
58,I,0.245,0.190,0.060,0.0860,0.0420,0.0140,0.0250,4
...,...,...,...,...,...,...,...,...,...
4119,I,0.300,0.215,0.050,0.1185,0.0480,0.0225,0.0420,4
4149,I,0.280,0.215,0.070,0.1240,0.0630,0.0215,0.0300,6
4151,I,0.350,0.250,0.075,0.1695,0.0835,0.0355,0.0410,6
4154,I,0.435,0.330,0.095,0.3930,0.2190,0.0750,0.0885,6


## Subset your DataFrame based on two different conditions

In [319]:
df0[(df0.Sex == 'F') & (df0.Rings<7)]

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
514,F,0.275,0.195,0.07,0.08,0.031,0.0215,0.025,5
519,F,0.345,0.25,0.09,0.203,0.078,0.059,0.055,6
521,F,0.36,0.27,0.09,0.1885,0.0845,0.0385,0.055,5
860,F,0.595,0.475,0.16,1.1405,0.547,0.231,0.271,6
931,F,0.445,0.335,0.11,0.4355,0.2025,0.1095,0.1195,6
944,F,0.465,0.35,0.125,0.482,0.23,0.106,0.1095,6
948,F,0.475,0.36,0.12,0.5915,0.3245,0.11,0.127,6
1039,F,0.66,0.475,0.18,1.3695,0.641,0.294,0.335,6
1105,F,0.51,0.4,0.125,0.545,0.261,0.115,0.1385,6
1584,F,0.515,0.375,0.11,0.6065,0.3005,0.131,0.15,6


## Subset your DataFrame based on three different conditions

In [320]:
df0[(df0.Sex == 'F') & (df0.Rings<7) & (df0.Height<0.090)]

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
514,F,0.275,0.195,0.07,0.08,0.031,0.0215,0.025,5
2123,F,0.29,0.21,0.075,0.275,0.113,0.0675,0.035,6
2167,F,0.37,0.275,0.085,0.2405,0.104,0.0535,0.07,5
2206,F,0.29,0.225,0.075,0.14,0.0515,0.0235,0.04,5
3159,F,0.34,0.255,0.085,0.204,0.097,0.021,0.05,6
3161,F,0.335,0.22,0.07,0.17,0.076,0.0365,0.05,6


## Use loc to slice your DataFrame for the 2 columns that returned the largest sum

In [321]:
df0.loc[:,["Height","Rings"]]

Unnamed: 0,Height,Rings
0,0.095,15
1,0.090,7
2,0.135,9
3,0.125,10
4,0.080,7
...,...,...
4172,0.165,11
4173,0.135,10
4174,0.205,9
4175,0.150,10


## Use iloc to slice your DataFrame for the 2 columns that returned the largest mean

In [333]:
df0.iloc[:,[4,-1]]

Unnamed: 0,Whole weight,Rings
0,0.5140,15
1,0.2255,7
2,0.6770,9
3,0.5160,10
4,0.2050,7
...,...,...
4172,0.8870,11
4173,0.9660,10
4174,1.1760,9
4175,1.0945,10


## Return the first n rows

In [335]:
df0.head(3) 

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9


## Randomly select n rows

In [339]:
df0.sample()  

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
3044,I,0.575,0.445,0.16,0.9175,0.45,0.1935,0.24,9
