# Intro to Pandas & Numpy

By Likith Kumar Dundigalla

In [1]:
import numpy as np

oneDim = np.array([1.0,2,3,4,5])   # a 1-dimensional array (vector)
print(oneDim)
print("#Dimensions =", oneDim.ndim)
print("Dimension =", oneDim.shape)
print("Size =", oneDim.size)
print("Array type =", oneDim.dtype, '\n')

twoDim = np.array([[1,2],[3,4],[5,6],[7,8]])  # a two-dimensional array (matrix)
print(twoDim)
print("#Dimensions =", twoDim.ndim)
print("Dimension =", twoDim.shape)
print("Size =", twoDim.size)
print("Array type =", twoDim.dtype, '\n')

arrFromTuple = np.array([(1,'a',3.0),(2,'b',3.5)])  # create ndarray from tuple
print(arrFromTuple)
print("#Dimensions =", arrFromTuple.ndim)
print("Dimension =", arrFromTuple.shape)
print("Size =", arrFromTuple.size)

[1. 2. 3. 4. 5.]
#Dimensions = 1
Dimension = (5,)
Size = 5
Array type = float64 

[[1 2]
 [3 4]
 [5 6]
 [7 8]]
#Dimensions = 2
Dimension = (4, 2)
Size = 8
Array type = int32 

[['1' 'a' '3.0']
 ['2' 'b' '3.5']]
#Dimensions = 2
Dimension = (2, 3)
Size = 6


In [2]:
print('Array of random numbers from a uniform distribution')
print(np.random.rand(5))      # random numbers from a uniform distribution between [0,1]

print('\nArray of random numbers from a normal distribution')
print(np.random.randn(5))     # random numbers from a normal distribution

print('\nArray of integers between -10 and 10, with step size of 2')
print(np.arange(-10,10,2))    # similar to range, but returns ndarray instead of list

print('\n2-dimensional array of integers from 0 to 11')
print(np.arange(12).reshape(3,4))  # reshape to a matrix

print('\nArray of values between 0 and 1, split into 10 equally spaced values')
print(np.linspace(0,1,10))    # split interval [0,1] into 10 equally separated values

print('\nArray of values from 10^-3 to 10^3')
print(np.logspace(-3,3,7))    # create ndarray with values from 10^-3 to 10^3

Array of random numbers from a uniform distribution
[0.73747132 0.24197332 0.48844208 0.5852318  0.84791823]

Array of random numbers from a normal distribution
[ 0.4205086  -0.04211675 -0.05755466 -0.40638565  1.45489487]

Array of integers between -10 and 10, with step size of 2
[-10  -8  -6  -4  -2   0   2   4   6   8]

2-dimensional array of integers from 0 to 11
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

Array of values between 0 and 1, split into 10 equally spaced values
[0.         0.11111111 0.22222222 0.33333333 0.44444444 0.55555556
 0.66666667 0.77777778 0.88888889 1.        ]

Array of values from 10^-3 to 10^3
[1.e-03 1.e-02 1.e-01 1.e+00 1.e+01 1.e+02 1.e+03]


In [3]:
print('A 2 x 3 matrix of zeros')
print(np.zeros((2,3)))        # a matrix of zeros

print('\nA 3 x 2 matrix of ones')
print(np.ones((3,2)))         # a matrix of ones

print('\nA 3 x 3 identity matrix')
print(np.eye(3))              # a 3 x 3 identity matrix

A 2 x 3 matrix of zeros
[[0. 0. 0.]
 [0. 0. 0.]]

A 3 x 2 matrix of ones
[[1. 1.]
 [1. 1.]
 [1. 1.]]

A 3 x 3 identity matrix
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


In [4]:
x = np.array([1,2,3,4,5])

print('x =', x)
print('x + 1 =', x + 1)      # addition
print('x - 1 =', x - 1)      # subtraction
print('x * 2 =', x * 2)      # multiplication
print('x // 2 =', x // 2)     # integer division
print('x ** 2 =', x ** 2)     # square
print('x % 2 =', x % 2)      # modulo  
print('1 / x =', 1 / x)      # division

x = [1 2 3 4 5]
x + 1 = [2 3 4 5 6]
x - 1 = [0 1 2 3 4]
x * 2 = [ 2  4  6  8 10]
x // 2 = [0 1 1 2 2]
x ** 2 = [ 1  4  9 16 25]
x % 2 = [1 0 1 0 1]
1 / x = [1.         0.5        0.33333333 0.25       0.2       ]


In [5]:
x = np.array([2,4,6,8,10])
y = np.array([1,2,3,4,5])

print('x =', x)
print('y =', y)
print('x + y =', x + y)      # element-wise addition
print('x - y =', x - y)      # element-wise subtraction
print('x * y =', x * y)      # element-wise multiplication 
print('x / y =', x / y)      # element-wise division
print('x // y =', x // y)    # element-wise integer division 
print('x ** y =', x ** y)    # element-wise exponentiation

x = [ 2  4  6  8 10]
y = [1 2 3 4 5]
x + y = [ 3  6  9 12 15]
x - y = [1 2 3 4 5]
x * y = [ 2  8 18 32 50]
x / y = [2. 2. 2. 2. 2.]
x // y = [2 2 2 2 2]
x ** y = [     2     16    216   4096 100000]


In [6]:
x = np.arange(-5,5)
print('Before: x =', x)

y = x[3:5]     # y is a slice, i.e., pointer to a subarray in x
print('        y =', y)
y[:] = 1000    # modifying the value of y will change x
print('After : y =', y)
print('        x =', x, '\n')

z = x[3:5].copy()   # makes a copy of the subarray
print('Before: x =', x)
print('        z =', z)
z[:] = 500          # modifying the value of z will not affect x
print('After : z =', z)
print('        x =', x)

Before: x = [-5 -4 -3 -2 -1  0  1  2  3  4]
        y = [-2 -1]
After : y = [1000 1000]
        x = [  -5   -4   -3 1000 1000    0    1    2    3    4] 

Before: x = [  -5   -4   -3 1000 1000    0    1    2    3    4]
        z = [1000 1000]
After : z = [500 500]
        x = [  -5   -4   -3 1000 1000    0    1    2    3    4]


In [7]:
my2dlist = [[1,2,3,4],[5,6,7,8],[9,10,11,12]]  # a 2-dim list
print('my2dlist =', my2dlist)
print('my2dlist[2] =', my2dlist[2])            # access the third sublist
print('my2dlist[:][2] =', my2dlist[:][2])      # can't access third element of each sublist
# print('my2dlist[:,2] =', my2dlist[:,2])      # invalid way to access sublist, will cause syntax error

my2darr = np.array(my2dlist)
print('\nmy2darr =\n', my2darr)

print('my2darr[2][:] =', my2darr[2][:])      # access the third row
print('my2darr[2,:] =', my2darr[2,:])        # access the third row
print('my2darr[:][2] =', my2darr[:][2])      # access the third row (similar to 2d list)
print('my2darr[:,2] =', my2darr[:,2])        # access the third column
print('my2darr[:2,2:] =\n', my2darr[:2,2:])     # access the first two rows & last two columns

my2dlist = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
my2dlist[2] = [9, 10, 11, 12]
my2dlist[:][2] = [9, 10, 11, 12]

my2darr =
 [[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
my2darr[2][:] = [ 9 10 11 12]
my2darr[2,:] = [ 9 10 11 12]
my2darr[:][2] = [ 9 10 11 12]
my2darr[:,2] = [ 3  7 11]
my2darr[:2,2:] =
 [[3 4]
 [7 8]]


In [8]:
my2darr = np.arange(1,13,1).reshape(3,4)
print('my2darr =\n', my2darr)

divBy3 = my2darr[my2darr % 3 == 0]
print('\nmy2darr[my2darr % 3 == 0] =', divBy3)            # returns all the elements divisible by 3 in an ndarray

divBy3LastRow = my2darr[2:, my2darr[2,:] % 3 == 0]
print('my2darr[2:, my2darr[2,:] % 3 == 0] =', divBy3LastRow)    # returns elements in the last row divisible by 3

my2darr =
 [[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]

my2darr[my2darr % 3 == 0] = [ 3  6  9 12]
my2darr[2:, my2darr[2,:] % 3 == 0] = [[ 9 12]]


In [9]:
my2darr = np.arange(1,13,1).reshape(4,3)
print('my2darr =\n', my2darr)

indices = [2,1,0,3]    # selected row indices
print('indices =', indices, '\n')
print('my2darr[indices,:] =\n', my2darr[indices,:])  # this will shuffle the rows of my2darr

rowIndex = [0,0,1,2,3]     # row index into my2darr
print('\nrowIndex =', rowIndex)
columnIndex = [0,2,0,1,2]  # column index into my2darr
print('columnIndex =', columnIndex, '\n')
print('my2darr[rowIndex,columnIndex] =', my2darr[rowIndex,columnIndex])

my2darr =
 [[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
indices = [2, 1, 0, 3] 

my2darr[indices,:] =
 [[ 7  8  9]
 [ 4  5  6]
 [ 1  2  3]
 [10 11 12]]

rowIndex = [0, 0, 1, 2, 3]
columnIndex = [0, 2, 0, 1, 2] 

my2darr[rowIndex,columnIndex] = [ 1  3  4  8 12]


In [10]:
y = np.array([-1.4, 0.4, -3.2, 2.5, 3.4])    
print('y =', y, '\n')

print('np.abs(y) =', np.abs(y))                # convert to absolute values
print('np.sqrt(abs(y)) =', np.sqrt(abs(y)))    # apply square root to each element
print('np.sign(y) =', np.sign(y))              # get the sign of each element
print('np.exp(y) =', np.exp(y))                # apply exponentiation
print('np.sort(y) =', np.sort(y))              # sort array

y = [-1.4  0.4 -3.2  2.5  3.4] 

np.abs(y) = [1.4 0.4 3.2 2.5 3.4]
np.sqrt(abs(y)) = [1.18321596 0.63245553 1.78885438 1.58113883 1.84390889]
np.sign(y) = [-1.  1. -1.  1.  1.]
np.exp(y) = [ 0.24659696  1.4918247   0.0407622  12.18249396 29.96410005]
np.sort(y) = [-3.2 -1.4  0.4  2.5  3.4]


In [11]:
x = np.arange(-2,3)
y = np.random.randn(5)
print('x =', x)
print('y =', y, '\n')

print('np.add(x,y) =', np.add(x,y))                # element-wise addition       x + y
print('np.subtract(x,y) =', np.subtract(x,y))      # element-wise subtraction    x - y
print('np.multiply(x,y) =', np.multiply(x,y))      # element-wise multiplication x * y
print('np.divide(x,y) =', np.divide(x,y))          # element-wise division       x / y
print('np.maximum(x,y) =', np.maximum(x,y))        # element-wise maximum        max(x,y)

x = [-2 -1  0  1  2]
y = [ 0.40235946 -0.79856307 -0.54550781 -1.37567688 -0.32575864] 

np.add(x,y) = [-1.59764054 -1.79856307 -0.54550781 -0.37567688  1.67424136]
np.subtract(x,y) = [-2.40235946 -0.20143693  0.54550781  2.37567688  2.32575864]
np.multiply(x,y) = [-0.80471891  0.79856307 -0.         -1.37567688 -0.65151727]
np.divide(x,y) = [-4.97067976  1.25224924 -0.         -0.72691488 -6.1395149 ]
np.maximum(x,y) = [ 0.40235946 -0.79856307  0.          1.          2.        ]


In [12]:
y = np.array([-3.2, -1.4, 0.4, 2.5, 3.4])    
print('y =', y, '\n')

print("Min =", np.min(y))             # min 
print("Max =", np.max(y))             # max 
print("Average =", np.mean(y))        # mean/average
print("Std deviation =", np.std(y))   # standard deviation
print("Sum =", np.sum(y))             # sum 

y = [-3.2 -1.4  0.4  2.5  3.4] 

Min = -3.2
Max = 3.4
Average = 0.34000000000000014
Std deviation = 2.432776191925595
Sum = 1.7000000000000006


In [13]:
X = np.random.randn(2,3)                         # create a 2 x 3 random matrix
print('X =\n', X, '\n')
print('Transpose of X, X.T =\n', X.T, '\n')      # matrix transpose operation X^T

y = np.random.randn(3) # random vector 
print('y =', y, '\n')

print('Matrix-vector multiplication')
print('X.dot(y) =\n', X.dot(y), '\n')            # matrix-vector multiplication  X * y

print('Matrix-matrix product')
print('X.dot(X.T) =', X.dot(X.T))        # matrix-matrix multiplication  X * X^T
print('\nX.T.dot(X) =\n', X.T.dot(X))      # matrix-matrix multiplication  X^T * X

X =
 [[-0.10841947 -0.59577281  0.03851293]
 [-1.72598437 -1.33973567 -1.47069437]] 

Transpose of X, X.T =
 [[-0.10841947 -1.72598437]
 [-0.59577281 -1.33973567]
 [ 0.03851293 -1.47069437]] 

y = [-1.45469376  0.39753233  0.48558191] 

Matrix-vector multiplication
X.dot(y) =
 [-0.06042064  1.26404788] 

Matrix-matrix product
X.dot(X.T) = [[0.36818327 0.92866765]
 [0.92866765 6.93685564]]

X.T.dot(X) =
 [[2.99077684 2.3769562  2.53421995]
 [2.3769562  2.14983691 1.94739674]
 [2.53421995 1.94739674 2.16442517]]


In [14]:
X = np.random.randn(5,3)
print('X =\n', X, '\n')

C = X.T.dot(X)               # C = X^T * X is a square matrix
print('C = X.T.dot(X) =\n', C, '\n')

invC = np.linalg.inv(C)      # inverse of a square matrix
print('Inverse of C = np.linalg.inv(C)\n', invC, '\n')

detC = np.linalg.det(C)      # determinant of a square matrix
print('Determinant of C = np.linalg.det(C) =', detC)

S, U = np.linalg.eig(C)      # eigenvalue S and eigenvector U of a square matrix
print('Eigenvalues of C =\n', S)
print('Eigenvectors of C =\n', U)

X =
 [[ 0.61745543 -0.69587863 -1.10802969]
 [ 0.43993903 -0.56962891 -0.15709282]
 [ 0.81989724  1.07915254  1.34859515]
 [ 0.03973966  0.61602322  0.1817617 ]
 [ 0.80854029 -0.05075367  0.21238007]] 

C = X.T.dot(X) =
 [[1.90234568 0.18796232 0.53138021]
 [0.18796232 2.35535491 2.41706904]
 [0.53138021 2.41706904 3.14925944]] 

Inverse of C = np.linalg.inv(C)
 [[ 0.58273597  0.25613163 -0.29490812]
 [ 0.25613163  2.11159921 -1.66387827]
 [-0.29490812 -1.66387827  1.64432848]] 

Determinant of C = np.linalg.det(C) = 2.703455810722617
Eigenvalues of C =
 [5.28398951 1.84578072 0.27718979]
Eigenvectors of C =
 [[ 0.15425712  0.97985136 -0.12687019]
 [ 0.63478252 -0.19668502 -0.74723902]
 [ 0.75713664 -0.03473195  0.65233258]]


In [15]:
from pandas import Series

s = Series([3.1, 2.4, -1.7, 0.2, -2.9, 4.5])   # creating a series from a list
print('Series, s =\n', s, '\n')

print('s.values =', s.values)     # display values of the Series
print('s.index =', s.index)       # display indices of the Series
print('s.dtype =', s.dtype)       # display the element type of the Series

Series, s =
 0    3.1
1    2.4
2   -1.7
3    0.2
4   -2.9
5    4.5
dtype: float64 

s.values = [ 3.1  2.4 -1.7  0.2 -2.9  4.5]
s.index = RangeIndex(start=0, stop=6, step=1)
s.dtype = float64


In [16]:
import numpy as np

s2 = Series(np.random.randn(6))   # creating a series from a numpy ndarray
print('Series s2 =\n', s2, '\n')
print('s2.values =', s2.values)   # display values of the Series
print('s2.index =', s2.index)     # display indices of the Series
print('s2.dtype =', s2.dtype)     # display the element type of the Series

Series s2 =
 0   -1.513467
1   -0.648586
2   -1.373930
3   -0.928484
4   -0.759209
5   -0.659676
dtype: float64 

s2.values = [-1.51346695 -0.64858596 -1.37393004 -0.92848423 -0.75920934 -0.65967607]
s2.index = RangeIndex(start=0, stop=6, step=1)
s2.dtype = float64


In [17]:
s3 = Series([1.2,2.5,-2.2,3.1,-0.8,-3.2], 
            index = ['Jan 1','Jan 2','Jan 3','Jan 4','Jan 5','Jan 6',])
print('Series s3 =\n', s3, '\n')
print('s3.values =', s3.values)   # display values of the Series
print('s3.index =', s3.index)     # display indices of the Series
print('s3.dtype =', s3.dtype)     # display the element type of the Series

Series s3 =
 Jan 1    1.2
Jan 2    2.5
Jan 3   -2.2
Jan 4    3.1
Jan 5   -0.8
Jan 6   -3.2
dtype: float64 

s3.values = [ 1.2  2.5 -2.2  3.1 -0.8 -3.2]
s3.index = Index(['Jan 1', 'Jan 2', 'Jan 3', 'Jan 4', 'Jan 5', 'Jan 6'], dtype='object')
s3.dtype = float64


In [18]:
capitals = {'MI': 'Lansing', 'CA': 'Sacramento', 'TX': 'Austin', 'MN': 'St Paul'}

s4 = Series(capitals)   # creating a series from dictionary object
print('Series s4 =\n', s4, '\n')
print('s4.values =', s4.values)   # display values of the Series
print('s4.index=', s4.index)      # display indices of the Series
print('s4.dtype =', s4.dtype)     # display the element type of the Series

Series s4 =
 MI       Lansing
CA    Sacramento
TX        Austin
MN       St Paul
dtype: object 

s4.values = ['Lansing' 'Sacramento' 'Austin' 'St Paul']
s4.index= Index(['MI', 'CA', 'TX', 'MN'], dtype='object')
s4.dtype = object


In [19]:
s3 = Series([1.2,2.5,-2.2,3.1,-0.8,-3.2], 
            index = ['Jan 1','Jan 2','Jan 3','Jan 4','Jan 5','Jan 6',])
print('s3 =\n', s3, '\n')

# Accessing elements of a Series

print('s3[2]=', s3[2])        # display third element of the Series
print('s3[\'Jan 3\']=', s3['Jan 3'])   # indexing element of a Series 

print('\ns3[1:3]=')             # display a slice of the Series
print(s3[1:3])
print('\ns3.iloc([1:3])=')      # display a slice of the Series
print(s3.iloc[1:3])

s3 =
 Jan 1    1.2
Jan 2    2.5
Jan 3   -2.2
Jan 4    3.1
Jan 5   -0.8
Jan 6   -3.2
dtype: float64 

s3[2]= -2.2
s3['Jan 3']= -2.2

s3[1:3]=
Jan 2    2.5
Jan 3   -2.2
dtype: float64

s3.iloc([1:3])=
Jan 2    2.5
Jan 3   -2.2
dtype: float64


In [20]:
s3['Jan 7'] = np.nan
print('Series s3 =\n', s3, '\n')

print('Shape of s3 =', s3.shape)   # get the dimension of the Series
print('Size of s3 =', s3.size)     # get the number of elements of the Series
print('Count of s3 =', s3.count()) # get the number of non-null elements of the Series

Series s3 =
 Jan 1    1.2
Jan 2    2.5
Jan 3   -2.2
Jan 4    3.1
Jan 5   -0.8
Jan 6   -3.2
Jan 7    NaN
dtype: float64 

Shape of s3 = (7,)
Size of s3 = 7
Count of s3 = 6


In [21]:
print(s3[s3 > 0])   # applying filter to select non-negative elements of the Series

Jan 1    1.2
Jan 2    2.5
Jan 4    3.1
dtype: float64


In [22]:
print('s3 + 4 =\n', s3 + 4, '\n')       
print('s3 / 4 =\n', s3 / 4)                 

s3 + 4 =
 Jan 1    5.2
Jan 2    6.5
Jan 3    1.8
Jan 4    7.1
Jan 5    3.2
Jan 6    0.8
Jan 7    NaN
dtype: float64 

s3 / 4 =
 Jan 1    0.300
Jan 2    0.625
Jan 3   -0.550
Jan 4    0.775
Jan 5   -0.200
Jan 6   -0.800
Jan 7      NaN
dtype: float64


In [23]:
print('np.log(s3 + 4) =\n', np.log(s3 + 4), '\n')    # applying log function to a numeric Series
print('np.exp(s3 - 4) =\n', np.exp(s3 - 4), '\n')    # applying exponent function to a numeric Series

np.log(s3 + 4) =
 Jan 1    1.648659
Jan 2    1.871802
Jan 3    0.587787
Jan 4    1.960095
Jan 5    1.163151
Jan 6   -0.223144
Jan 7         NaN
dtype: float64 

np.exp(s3 - 4) =
 Jan 1    0.060810
Jan 2    0.223130
Jan 3    0.002029
Jan 4    0.406570
Jan 5    0.008230
Jan 6    0.000747
Jan 7         NaN
dtype: float64 



In [24]:
colors = Series(['red', 'blue', 'blue', 'yellow', 'red', 'green', 'blue', np.nan])
print('colors =\n', colors, '\n')

print('colors.value_counts() =\n', colors.value_counts())

colors =
 0       red
1      blue
2      blue
3    yellow
4       red
5     green
6      blue
7       NaN
dtype: object 

colors.value_counts() =
 blue      3
red       2
yellow    1
green     1
dtype: int64


In [25]:
from pandas import DataFrame

cars = {'make': ['Ford', 'Honda', 'Toyota', 'Tesla'],
       'model': ['Taurus', 'Accord', 'Camry', 'Model S'],
       'MSRP': [27595, 23570, 23495, 68000]}          
carData = DataFrame(cars)            # creating DataFrame from dictionary
carData                              # display the table

Unnamed: 0,make,model,MSRP
0,Ford,Taurus,27595
1,Honda,Accord,23570
2,Toyota,Camry,23495
3,Tesla,Model S,68000


In [26]:
print('carData.index =', carData.index)         # print the row indices
print('carData.columns =', carData.columns)     # print the column indices

carData.index = RangeIndex(start=0, stop=4, step=1)
carData.columns = Index(['make', 'model', 'MSRP'], dtype='object')


In [27]:
carData2 = DataFrame(cars, index = [1,2,3,4])  # change the row index
carData2['year'] = 2018    # add column with same value
carData2['dealership'] = ['Courtesy Ford','Capital Honda','Spartan Toyota','N/A']
carData2                   # display table

Unnamed: 0,make,model,MSRP,year,dealership
1,Ford,Taurus,27595,2018,Courtesy Ford
2,Honda,Accord,23570,2018,Capital Honda
3,Toyota,Camry,23495,2018,Spartan Toyota
4,Tesla,Model S,68000,2018,


In [28]:
tuplelist = [(2011,45.1,32.4),(2012,42.4,34.5),(2013,47.2,39.2),
              (2014,44.2,31.4),(2015,39.9,29.8),(2016,41.5,36.7)]
columnNames = ['year','temp','precip']
weatherData = DataFrame(tuplelist, columns=columnNames)
weatherData

Unnamed: 0,year,temp,precip
0,2011,45.1,32.4
1,2012,42.4,34.5
2,2013,47.2,39.2
3,2014,44.2,31.4
4,2015,39.9,29.8
5,2016,41.5,36.7


In [29]:
import numpy as np

npdata = np.random.randn(5,3)  # create a 5 by 3 random matrix
columnNames = ['x1','x2','x3']
data = DataFrame(npdata, columns=columnNames)
data

Unnamed: 0,x1,x2,x3
0,-1.89397,0.993616,2.046428
1,0.8178,1.392261,0.518061
2,-0.085934,0.024937,-1.123828
3,-0.975187,-0.111832,0.881997
4,1.38234,0.376212,-0.514129


In [30]:
# accessing an entire column will return a Series object

print(data['x2'])
print(type(data['x2']))

0    0.993616
1    1.392261
2    0.024937
3   -0.111832
4    0.376212
Name: x2, dtype: float64
<class 'pandas.core.series.Series'>


In [31]:
# accessing an entire row will return a Series object

print('Row 3 of data table:')
print(data.iloc[2])       # returns the 3rd row of DataFrame
print(type(data.iloc[2]))

print('\nRow 3 of car data table:')
print(carData2.iloc[2])   # row contains objects of different types

Row 3 of data table:
x1   -0.085934
x2    0.024937
x3   -1.123828
Name: 2, dtype: float64
<class 'pandas.core.series.Series'>

Row 3 of car data table:
make                  Toyota
model                  Camry
MSRP                   23495
year                    2018
dealership    Spartan Toyota
Name: 3, dtype: object


In [32]:
# accessing a specific element of the DataFrame

print('carData2 =\n', carData2)

print('\ncarData2.iloc[1,2] =', carData2.iloc[1,2])                # retrieving second row, third column
print('carData2.loc[1,\'model\'] =', carData2.loc[1,'model'])    # retrieving second row, column named 'model'

# accessing a slice of the DataFrame

print('\ncarData2.iloc[1:3,1:3]=')
print(carData2.iloc[1:3,1:3])

carData2 =
      make    model   MSRP  year      dealership
1    Ford   Taurus  27595  2018   Courtesy Ford
2   Honda   Accord  23570  2018   Capital Honda
3  Toyota    Camry  23495  2018  Spartan Toyota
4   Tesla  Model S  68000  2018             N/A

carData2.iloc[1,2] = 23570
carData2.loc[1,'model'] = Taurus

carData2.iloc[1:3,1:3]=
    model   MSRP
2  Accord  23570
3   Camry  23495


In [33]:
print('carData2 =\n', carData2, '\n')

print('carData2.shape =', carData2.shape)
print('carData2.size =', carData2.size)

carData2 =
      make    model   MSRP  year      dealership
1    Ford   Taurus  27595  2018   Courtesy Ford
2   Honda   Accord  23570  2018   Capital Honda
3  Toyota    Camry  23495  2018  Spartan Toyota
4   Tesla  Model S  68000  2018             N/A 

carData2.shape = (4, 5)
carData2.size = 20


In [34]:
# selection and filtering

print('carData2 =\n', carData2, '\n')

print('carData2[carData2.MSRP > 25000] =')  
print(carData2[carData2.MSRP > 25000])

carData2 =
      make    model   MSRP  year      dealership
1    Ford   Taurus  27595  2018   Courtesy Ford
2   Honda   Accord  23570  2018   Capital Honda
3  Toyota    Camry  23495  2018  Spartan Toyota
4   Tesla  Model S  68000  2018             N/A 

carData2[carData2.MSRP > 25000] =
    make    model   MSRP  year     dealership
1   Ford   Taurus  27595  2018  Courtesy Ford
4  Tesla  Model S  68000  2018            N/A
