# Series Object in Pandas

In [5]:
import pandas as pd
data = [1,2,3,4]
series1 = pd.Series(data)

In [6]:
series1

0    1
1    2
2    3
3    4
dtype: int64

In [7]:
type(series1)

pandas.core.series.Series

# Changing the Index of a Series object

In [9]:
series1 = pd.Series(data, index = ['a','b','c','d'])

In [10]:
series1

a    1
b    2
c    3
d    4
dtype: int64

# How to create a DataFrame

In [20]:
# creating a dataframe using a list
import pandas as pd
data = [1,2,3,4,5]
df = pd.DataFrame(data)

In [19]:
df.rename(columns={0:'Number'})

Unnamed: 0,Number
0,1
1,2
2,3
3,4
4,5


In [64]:
# creating a dataframe using a dictionary
dictionary = {'fruits':['apples','banana','mangoes'], 'count':[10,20,15]}
df = pd.DataFrame(dictionary)
df

Unnamed: 0,count,fruits
0,10,apples
1,20,banana
2,15,mangoes


In [252]:
#creating a dataframe using a series
series = pd.Series([6,12], index=['a','b'])
df = pd.DataFrame(series)
df.rename(columns = {0:'Number'})

Unnamed: 0,Number
a,6
b,12


In [32]:
#creating a Dataframe using a numpy array

In [66]:
import numpy as np
numpyarray = np.array([[50000,60000],['john','James'],[123,12323]])
df = pd.DataFrame({'name':numpyarray[1], 'salary':numpyarray[0],'Pnumber':numpyarray[2]})
df

Unnamed: 0,Pnumber,name,salary
0,123,john,50000
1,12323,James,60000


In [67]:
numpyarray2 = np.array([[10000,20000,30000,40000],['Akmal','seema','uma','shubham']])
df = pd.DataFrame({'name':numpyarray2[1],'salary':numpyarray2[0]})
df

Unnamed: 0,name,salary
0,Akmal,10000
1,seema,20000
2,uma,30000
3,shubham,40000


# How to perform Merge Operation?

In [45]:
import pandas as pd
player = ['Player1','Player2','Player3']
point = [8,6,9]
title = ['Game1', 'Game2', 'Game3']
df1 = pd.DataFrame({'Player':player, 'Points':point, 'Title':title})
df1

Unnamed: 0,Player,Points,Title
0,Player1,8,Game1
1,Player2,6,Game2
2,Player3,9,Game3


In [52]:
player = ['Player1', 'Player5', 'Player6']
power = ['Punch', 'Kick', 'Elbow']
title = ['Game1', 'Game5', 'Game6']
df2 = pd.DataFrame({'Player':player, 'Power':power, 'Title':title})
df2

Unnamed: 0,Player,Power,Title
0,Player1,Punch,Game1
1,Player5,Kick,Game5
2,Player6,Elbow,Game6


In [54]:
# Inner Merge
df1.merge(df2, on='Player', how='inner')

Unnamed: 0,Player,Points,Title_x,Power,Title_y
0,Player1,8,Game1,Punch,Game1


In [55]:
# By default it is inner merge
df1.merge(df2)

Unnamed: 0,Player,Points,Title,Power
0,Player1,8,Game1,Punch


In [59]:
# Left merge
df1.merge(df2, on = 'Player', how= 'left')

Unnamed: 0,Player,Points,Title_x,Power,Title_y
0,Player1,8,Game1,Punch,Game1
1,Player2,6,Game2,,
2,Player3,9,Game3,,


In [60]:
# Right merge
df1.merge(df2, on = 'Player', how= 'right')

Unnamed: 0,Player,Points,Title_x,Power,Title_y
0,Player1,8.0,Game1,Punch,Game1
1,Player5,,,Kick,Game5
2,Player6,,,Elbow,Game6


In [61]:
# Outer merge
df1.merge(df2, on = 'Player', how= 'outer')

Unnamed: 0,Player,Points,Title_x,Power,Title_y
0,Player1,8.0,Game1,Punch,Game1
1,Player2,6.0,Game2,,
2,Player3,9.0,Game3,,
3,Player5,,,Kick,Game5
4,Player6,,,Elbow,Game6


In [72]:
# How to perform Join Operation in pandas
player = ['Player1','Player2','Player3']
point = [8,6,9]
title = ['Game1', 'Game2', 'Game3']
df3 = pd.DataFrame({'Player':player, 'Points':point, 'Title':title}, 
                   index = ['L1', 'L2', 'L3'])
df3

Unnamed: 0,Player,Points,Title
L1,Player1,8,Game1
L2,Player2,6,Game2
L3,Player3,9,Game3


In [70]:
player = ['Player1', 'Player5', 'Player6']
power = ['Punch', 'Kick', 'Elbow']
title = ['Game1', 'Game5', 'Game6']
df4 = pd.DataFrame({'Players':player, 'Power':power, 'Titles':title}, 
                  index = ['L2', 'L3', 'L4'])
df4

Unnamed: 0,Players,Power,Titles
L2,Player1,Punch,Game1
L3,Player5,Kick,Game5
L4,Player6,Elbow,Game6


In [75]:
# Inner join
df3.join(df4, how='inner')

Unnamed: 0,Player,Points,Title,Players,Power,Titles
L2,Player2,6,Game2,Player1,Punch,Game1
L3,Player3,9,Game3,Player5,Kick,Game5


In [76]:
# left join
df3.join(df4, how='left')

Unnamed: 0,Player,Points,Title,Players,Power,Titles
L1,Player1,8,Game1,,,
L2,Player2,6,Game2,Player1,Punch,Game1
L3,Player3,9,Game3,Player5,Kick,Game5


In [77]:
# right join
df3.join(df4, how='right')

Unnamed: 0,Player,Points,Title,Players,Power,Titles
L2,Player2,6.0,Game2,Player1,Punch,Game1
L3,Player3,9.0,Game3,Player5,Kick,Game5
L4,,,,Player6,Elbow,Game6


In [82]:
# Outer join
df3.join(df4, how='outer')

Unnamed: 0,Player,Points,Title,Players,Power,Titles
L1,Player1,8.0,Game1,,,
L2,Player2,6.0,Game2,Player1,Punch,Game1
L3,Player3,9.0,Game3,Player5,Kick,Game5
L4,,,,Player6,Elbow,Game6


# How to Concatenate two DataFrame in Pandas

In [84]:
pd.concat([df3,df4])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,Player,Players,Points,Power,Title,Titles
L1,Player1,,8.0,,Game1,
L2,Player2,,6.0,,Game2,
L3,Player3,,9.0,,Game3,
L2,,Player1,,Punch,,Game1
L3,,Player5,,Kick,,Game5
L4,,Player6,,Elbow,,Game6


# Importing Data set

In [85]:
#import pandas library
import pandas as pd
#read dataset and store into a dataframe
cars = pd.read_csv('/home/akmal/Documents/mtcars.csv')
#print
cars

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2
5,Valiant,18.1,6,225.0,105,2.76,3.46,20.22,1,0,3,1
6,Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4
7,Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2
8,Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
9,Merc 280,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4


# Analyzind Data set

In [86]:
type(cars)

pandas.core.frame.DataFrame

In [105]:
cars.count()

model    32
mpg      32
cyl      32
disp     32
hp       32
drat     32
wt       32
qsec     32
vs       32
am       32
gear     32
carb     32
dtype: int64

In [91]:
#To get starting 5 rows
cars.head()

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


In [92]:
# This will give you starting 10 rows
cars.head(10)

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2
5,Valiant,18.1,6,225.0,105,2.76,3.46,20.22,1,0,3,1
6,Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4
7,Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2
8,Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
9,Merc 280,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4


In [93]:
#this will give you last 5 values
cars.tail()

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
27,Lotus Europa,30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2
28,Ford Pantera L,15.8,8,351.0,264,4.22,3.17,14.5,0,1,5,4
29,Ferrari Dino,19.7,6,145.0,175,3.62,2.77,15.5,0,1,5,6
30,Maserati Bora,15.0,8,301.0,335,3.54,3.57,14.6,0,1,5,8
31,Volvo 142E,21.4,4,121.0,109,4.11,2.78,18.6,1,1,4,2


In [95]:
#this will give you last 10 values
cars.tail(10)

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
22,AMC Javelin,15.2,8,304.0,150,3.15,3.435,17.3,0,0,3,2
23,Camaro Z28,13.3,8,350.0,245,3.73,3.84,15.41,0,0,3,4
24,Pontiac Firebird,19.2,8,400.0,175,3.08,3.845,17.05,0,0,3,2
25,Fiat X1-9,27.3,4,79.0,66,4.08,1.935,18.9,1,1,4,1
26,Porsche 914-2,26.0,4,120.3,91,4.43,2.14,16.7,0,1,5,2
27,Lotus Europa,30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2
28,Ford Pantera L,15.8,8,351.0,264,4.22,3.17,14.5,0,1,5,4
29,Ferrari Dino,19.7,6,145.0,175,3.62,2.77,15.5,0,1,5,6
30,Maserati Bora,15.0,8,301.0,335,3.54,3.57,14.6,0,1,5,8
31,Volvo 142E,21.4,4,121.0,109,4.11,2.78,18.6,1,1,4,2


In [96]:
#This will give number of rows and columns
cars.shape

(32, 12)

In [98]:
#print a consise summary of the columns
cars.info(null_counts=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 12 columns):
model    32 non-null object
mpg      32 non-null float64
cyl      32 non-null int64
disp     32 non-null float64
hp       32 non-null int64
drat     32 non-null float64
wt       32 non-null float64
qsec     32 non-null float64
vs       32 non-null int64
am       32 non-null int64
gear     32 non-null int64
carb     32 non-null int64
dtypes: float64(5), int64(6), object(1)
memory usage: 3.1+ KB


In [99]:
#to calculate mean
cars.mean()

mpg      20.090625
cyl       6.187500
disp    230.721875
hp      146.687500
drat      3.596563
wt        3.217250
qsec     17.848750
vs        0.437500
am        0.406250
gear      3.687500
carb      2.812500
dtype: float64

In [100]:
#To calculate median
cars.median()

mpg      19.200
cyl       6.000
disp    196.300
hp      123.000
drat      3.695
wt        3.325
qsec     17.710
vs        0.000
am        0.000
gear      4.000
carb      2.000
dtype: float64

In [102]:
# To get Standard deviation
cars.std()

mpg       6.026948
cyl       1.785922
disp    123.938694
hp       68.562868
drat      0.534679
wt        0.978457
qsec      1.786943
vs        0.504016
am        0.498991
gear      0.737804
carb      1.615200
dtype: float64

In [103]:
# To get maximum value
cars.max()

model    Volvo 142E
mpg            33.9
cyl               8
disp            472
hp              335
drat           4.93
wt            5.424
qsec           22.9
vs                1
am                1
gear              5
carb              8
dtype: object

In [104]:
#to get minimum value
cars.min()

model    AMC Javelin
mpg             10.4
cyl                4
disp            71.1
hp                52
drat            2.76
wt             1.513
qsec            14.5
vs                 0
am                 0
gear               3
carb               1
dtype: object

In [106]:
#To count rows
cars.count()

model    32
mpg      32
cyl      32
disp     32
hp       32
drat     32
wt       32
qsec     32
vs       32
am       32
gear     32
carb     32
dtype: int64

In [107]:
# To describe dataframe
cars.describe()

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
count,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0
mean,20.090625,6.1875,230.721875,146.6875,3.596563,3.21725,17.84875,0.4375,0.40625,3.6875,2.8125
std,6.026948,1.785922,123.938694,68.562868,0.534679,0.978457,1.786943,0.504016,0.498991,0.737804,1.6152
min,10.4,4.0,71.1,52.0,2.76,1.513,14.5,0.0,0.0,3.0,1.0
25%,15.425,4.0,120.825,96.5,3.08,2.58125,16.8925,0.0,0.0,3.0,2.0
50%,19.2,6.0,196.3,123.0,3.695,3.325,17.71,0.0,0.0,4.0,2.0
75%,22.8,8.0,326.0,180.0,3.92,3.61,18.9,1.0,1.0,4.0,4.0
max,33.9,8.0,472.0,335.0,4.93,5.424,22.9,1.0,1.0,5.0,8.0


# Data Cleansing

In [254]:
#rename  column
cars = cars.rename(columns={'unnamed: 1':'model'})
cars

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,2,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,2,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,2,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,2,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,2,3,2
5,Valiant,18.1,6,225.0,105,2.76,3.46,20.22,1,2,3,1
6,Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,2,3,4
7,Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,2,4,2
8,Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,2,4,2
9,Merc 280,19.2,6,167.6,123,3.92,3.44,18.3,1,2,4,4


In [120]:
#Fill the null with mean of the column
cars.qsec = cars.qsec.fillna(cars.mean())
cars

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2
5,18.1,6,225.0,105,2.76,3.46,20.22,1,0,3,1
6,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4
7,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2
8,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
9,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4


Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2
5,18.1,6,225.0,105,2.76,3.46,20.22,1,0,3,1
6,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4
7,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2
8,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
9,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4


In [121]:
cars = pd.read_csv('/home/akmal/Documents/mtcars.csv')

In [122]:
cars

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2
5,Valiant,18.1,6,225.0,105,2.76,3.46,20.22,1,0,3,1
6,Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4
7,Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2
8,Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
9,Merc 280,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4


In [123]:
cars.qsec = cars.qsec.fillna(cars.mean())
cars

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2
5,Valiant,18.1,6,225.0,105,2.76,3.46,20.22,1,0,3,1
6,Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4
7,Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2
8,Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
9,Merc 280,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4


In [124]:
# finding correlation matrix
df = cars[['mpg','cyl','disp','hp','drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']].corr()

In [127]:
df

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
mpg,1.0,-0.852162,-0.847551,-0.776168,0.681172,-0.867659,0.418684,0.664039,0.599832,0.480285,-0.550925
cyl,-0.852162,1.0,0.902033,0.832447,-0.699938,0.782496,-0.591242,-0.810812,-0.522607,-0.492687,0.526988
disp,-0.847551,0.902033,1.0,0.790949,-0.710214,0.88798,-0.433698,-0.710416,-0.591227,-0.555569,0.394977
hp,-0.776168,0.832447,0.790949,1.0,-0.448759,0.658748,-0.708223,-0.723097,-0.243204,-0.125704,0.749812
drat,0.681172,-0.699938,-0.710214,-0.448759,1.0,-0.712441,0.091205,0.440278,0.712711,0.69961,-0.09079
wt,-0.867659,0.782496,0.88798,0.658748,-0.712441,1.0,-0.174716,-0.554916,-0.692495,-0.583287,0.427606
qsec,0.418684,-0.591242,-0.433698,-0.708223,0.091205,-0.174716,1.0,0.744535,-0.229861,-0.212682,-0.656249
vs,0.664039,-0.810812,-0.710416,-0.723097,0.440278,-0.554916,0.744535,1.0,0.168345,0.206023,-0.569607
am,0.599832,-0.522607,-0.591227,-0.243204,0.712711,-0.692495,-0.229861,0.168345,1.0,0.794059,0.057534
gear,0.480285,-0.492687,-0.555569,-0.125704,0.69961,-0.583287,-0.212682,0.206023,0.794059,1.0,0.274073


In [129]:
#changing Data type
#changing mpg from string to float
cars.mpg = cars.mpg.astype(float)
#see the change
cars.info(null_counts=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 12 columns):
model    32 non-null object
mpg      32 non-null float64
cyl      32 non-null int64
disp     32 non-null float64
hp       32 non-null int64
drat     32 non-null float64
wt       32 non-null float64
qsec     32 non-null float64
vs       32 non-null int64
am       32 non-null int64
gear     32 non-null int64
carb     32 non-null int64
dtypes: float64(5), int64(6), object(1)
memory usage: 3.1+ KB


In [130]:
cars.model

0               Mazda RX4
1           Mazda RX4 Wag
2              Datsun 710
3          Hornet 4 Drive
4       Hornet Sportabout
5                 Valiant
6              Duster 360
7               Merc 240D
8                Merc 230
9                Merc 280
10              Merc 280C
11             Merc 450SE
12             Merc 450SL
13            Merc 450SLC
14     Cadillac Fleetwood
15    Lincoln Continental
16      Chrysler Imperial
17               Fiat 128
18            Honda Civic
19         Toyota Corolla
20          Toyota Corona
21       Dodge Challenger
22            AMC Javelin
23             Camaro Z28
24       Pontiac Firebird
25              Fiat X1-9
26          Porsche 914-2
27           Lotus Europa
28         Ford Pantera L
29           Ferrari Dino
30          Maserati Bora
31             Volvo 142E
Name: model, dtype: object

In [137]:
#Again finding correlation including mpg
df = cars[['mpg','cyl','disp','hp','drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']].corr()

In [138]:
df

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
mpg,1.0,-0.852162,-0.847551,-0.776168,0.681172,-0.867659,0.418684,0.664039,0.599832,0.480285,-0.550925
cyl,-0.852162,1.0,0.902033,0.832447,-0.699938,0.782496,-0.591242,-0.810812,-0.522607,-0.492687,0.526988
disp,-0.847551,0.902033,1.0,0.790949,-0.710214,0.88798,-0.433698,-0.710416,-0.591227,-0.555569,0.394977
hp,-0.776168,0.832447,0.790949,1.0,-0.448759,0.658748,-0.708223,-0.723097,-0.243204,-0.125704,0.749812
drat,0.681172,-0.699938,-0.710214,-0.448759,1.0,-0.712441,0.091205,0.440278,0.712711,0.69961,-0.09079
wt,-0.867659,0.782496,0.88798,0.658748,-0.712441,1.0,-0.174716,-0.554916,-0.692495,-0.583287,0.427606
qsec,0.418684,-0.591242,-0.433698,-0.708223,0.091205,-0.174716,1.0,0.744535,-0.229861,-0.212682,-0.656249
vs,0.664039,-0.810812,-0.710416,-0.723097,0.440278,-0.554916,0.744535,1.0,0.168345,0.206023,-0.569607
am,0.599832,-0.522607,-0.591227,-0.243204,0.712711,-0.692495,-0.229861,0.168345,1.0,0.794059,0.057534
gear,0.480285,-0.492687,-0.555569,-0.125704,0.69961,-0.583287,-0.212682,0.206023,0.794059,1.0,0.274073


# Manipulating the Dataset

In [139]:
#viewing only one specified column using iloc
cars.iloc[:,4]

0     110
1     110
2      93
3     110
4     175
5     105
6     245
7      62
8      95
9     123
10    123
11    180
12    180
13    180
14    205
15    215
16    230
17     66
18     52
19     65
20     97
21    150
22    150
23    245
24    175
25     66
26     91
27    113
28    264
29    175
30    335
31    109
Name: hp, dtype: int64

In [146]:
#To view first five records of specified column (hp)
cars.iloc[0:5,4]

0    110
1    110
2     93
3    110
4    175
Name: hp, dtype: int64

In [147]:
#To view all the rows and all the columns
cars.iloc[:,:]

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2
5,Valiant,18.1,6,225.0,105,2.76,3.46,20.22,1,0,3,1
6,Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4
7,Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2
8,Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
9,Merc 280,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4


In [158]:
#for attributes from hp to carb see all the records from index 6
cars.iloc[6:,4:]

Unnamed: 0,hp,drat,wt,qsec,vs,am,gear,carb
6,245,3.21,3.57,15.84,0,0,3,4
7,62,3.69,3.19,20.0,1,0,4,2
8,95,3.92,3.15,22.9,1,0,4,2
9,123,3.92,3.44,18.3,1,0,4,4
10,123,3.92,3.44,18.9,1,0,4,4
11,180,3.07,4.07,17.4,0,0,3,3
12,180,3.07,3.73,17.6,0,0,3,3
13,180,3.07,3.78,18.0,0,0,3,3
14,205,2.93,5.25,17.98,0,0,3,4
15,215,3.0,5.424,17.82,0,0,3,4


In [168]:
#To locate all the rows from first column
cars.iloc[:,0]

0               Mazda RX4
1           Mazda RX4 Wag
2              Datsun 710
3          Hornet 4 Drive
4       Hornet Sportabout
5                 Valiant
6              Duster 360
7               Merc 240D
8                Merc 230
9                Merc 280
10              Merc 280C
11             Merc 450SE
12             Merc 450SL
13            Merc 450SLC
14     Cadillac Fleetwood
15    Lincoln Continental
16      Chrysler Imperial
17               Fiat 128
18            Honda Civic
19         Toyota Corolla
20          Toyota Corona
21       Dodge Challenger
22            AMC Javelin
23             Camaro Z28
24       Pontiac Firebird
25              Fiat X1-9
26          Porsche 914-2
27           Lotus Europa
28         Ford Pantera L
29           Ferrari Dino
30          Maserati Bora
31             Volvo 142E
Name: model, dtype: object

In [173]:
#See all the record of mpg column
cars.loc[:,"mpg"]

0     21.0
1     21.0
2     22.8
3     21.4
4     18.7
5     18.1
6     14.3
7     24.4
8     22.8
9     19.2
10    17.8
11    16.4
12    17.3
13    15.2
14    10.4
15    10.4
16    14.7
17    32.4
18    30.4
19    33.9
20    21.5
21    15.5
22    15.2
23    13.3
24    19.2
25    27.3
26    26.0
27    30.4
28    15.8
29    19.7
30    15.0
31    21.4
Name: mpg, dtype: float64

In [182]:
#to select dsired columns
df = cars[['model','mpg','hp']]
#to print starting 10 rows
df.iloc[:10]

Unnamed: 0,model,mpg,hp
0,Mazda RX4,21.0,110
1,Mazda RX4 Wag,21.0,110
2,Datsun 710,22.8,93
3,Hornet 4 Drive,21.4,110
4,Hornet Sportabout,18.7,175
5,Valiant,18.1,105
6,Duster 360,14.3,245
7,Merc 240D,24.4,62
8,Merc 230,22.8,95
9,Merc 280,19.2,123


In [186]:
#display the record from index 0 to index 6 from mpg column
cars.loc[:6,"mpg"]

0    21.0
1    21.0
2    22.8
3    21.4
4    18.7
5    18.1
6    14.3
Name: mpg, dtype: float64

In [190]:
#See the first 7 records from mpg to qsec column
cars.loc[:6,"mpg":"qsec"]

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec
0,21.0,6,160.0,110,3.9,2.62,16.46
1,21.0,6,160.0,110,3.9,2.875,17.02
2,22.8,4,108.0,93,3.85,2.32,18.61
3,21.4,6,258.0,110,3.08,3.215,19.44
4,18.7,8,360.0,175,3.15,3.44,17.02
5,18.1,6,225.0,105,2.76,3.46,20.22
6,14.3,8,360.0,245,3.21,3.57,15.84


In [219]:
#see value 1 to column 'am'
cars['am'] = 1
cars

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,1,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,1,3,2
5,Valiant,18.1,6,225.0,105,2.76,3.46,20.22,1,1,3,1
6,Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,1,3,4
7,Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,1,4,2
8,Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,1,4,2
9,Merc 280,19.2,6,167.6,123,3.92,3.44,18.3,1,1,4,4


In [223]:
# double up recorsd in 'am' using lambda fxn
f = lambda x: x*2
cars['am'] = cars['am'].apply(f)

In [224]:
cars

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,2,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,2,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,2,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,2,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,2,3,2
5,Valiant,18.1,6,225.0,105,2.76,3.46,20.22,1,2,3,1
6,Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,2,3,4
7,Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,2,4,2
8,Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,2,4,2
9,Merc 280,19.2,6,167.6,123,3.92,3.44,18.3,1,2,4,4


In [225]:
#sorting cyl column ascending order
cars.sort_values(by='cyl')

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
31,Volvo 142E,21.4,4,121.0,109,4.11,2.78,18.6,1,2,4,2
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,2,4,1
27,Lotus Europa,30.4,4,95.1,113,3.77,1.513,16.9,1,2,5,2
26,Porsche 914-2,26.0,4,120.3,91,4.43,2.14,16.7,0,2,5,2
25,Fiat X1-9,27.3,4,79.0,66,4.08,1.935,18.9,1,2,4,1
20,Toyota Corona,21.5,4,120.1,97,3.7,2.465,20.01,1,2,3,1
7,Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,2,4,2
8,Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,2,4,2
19,Toyota Corolla,33.9,4,71.1,65,4.22,1.835,19.9,1,2,4,1
18,Honda Civic,30.4,4,75.7,52,4.93,1.615,18.52,1,2,4,2


In [230]:
#sorting cyl column descending order
cars.sort_values(by='cyl', ascending = False)

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
16,Chrysler Imperial,14.7,8,440.0,230,3.23,5.345,17.42,0,2,3,4
30,Maserati Bora,15.0,8,301.0,335,3.54,3.57,14.6,0,2,5,8
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,2,3,2
28,Ford Pantera L,15.8,8,351.0,264,4.22,3.17,14.5,0,2,5,4
6,Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,2,3,4
24,Pontiac Firebird,19.2,8,400.0,175,3.08,3.845,17.05,0,2,3,2
23,Camaro Z28,13.3,8,350.0,245,3.73,3.84,15.41,0,2,3,4
22,AMC Javelin,15.2,8,304.0,150,3.15,3.435,17.3,0,2,3,2
21,Dodge Challenger,15.5,8,318.0,150,2.76,3.52,16.87,0,2,3,2
11,Merc 450SE,16.4,8,275.8,180,3.07,4.07,17.4,0,2,3,3


In [243]:
#filter records with more than 6 cylinder
df = cars['cyl'] > 6

In [244]:
df

0     False
1     False
2     False
3     False
4      True
5     False
6      True
7     False
8     False
9     False
10    False
11     True
12     True
13     True
14     True
15     True
16     True
17    False
18    False
19    False
20    False
21     True
22     True
23     True
24     True
25    False
26    False
27    False
28     True
29    False
30     True
31    False
Name: cyl, dtype: bool

In [245]:
#filter records with more than 6 cylinder
filter1 = cars['cyl'] > 6
#apply filter to Dataframe
filtered_new = cars[filter1]
# filtered dataframe
filtered_new

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,2,3,2
6,Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,2,3,4
11,Merc 450SE,16.4,8,275.8,180,3.07,4.07,17.4,0,2,3,3
12,Merc 450SL,17.3,8,275.8,180,3.07,3.73,17.6,0,2,3,3
13,Merc 450SLC,15.2,8,275.8,180,3.07,3.78,18.0,0,2,3,3
14,Cadillac Fleetwood,10.4,8,472.0,205,2.93,5.25,17.98,0,2,3,4
15,Lincoln Continental,10.4,8,460.0,215,3.0,5.424,17.82,0,2,3,4
16,Chrysler Imperial,14.7,8,440.0,230,3.23,5.345,17.42,0,2,3,4
21,Dodge Challenger,15.5,8,318.0,150,2.76,3.52,16.87,0,2,3,2
22,AMC Javelin,15.2,8,304.0,150,3.15,3.435,17.3,0,2,3,2


In [251]:
#filter records more than 6 cyl and hp more than 300
filter2 = (cars['cyl'] > 6) & (cars['hp'] > 300)
#apply filter to dataframe
filtered_review = cars[filter2]
filtered_review

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
30,Maserati Bora,15.0,8,301.0,335,3.54,3.57,14.6,0,2,5,8


# Introduction to Machine Learning