- Series

In [2]:
from pandas import Series

s = Series([3.1, 2.4, -1.7, 0.2, -2.9, 4.5])   # creating a series from a list
print(s)
print('Values=', s.values)     # display values of the Series
print('Index=', s.index)       # display indices of the Series

0    3.1
1    2.4
2   -1.7
3    0.2
4   -2.9
5    4.5
dtype: float64
Values= [ 3.1  2.4 -1.7  0.2 -2.9  4.5]
Index= RangeIndex(start=0, stop=6, step=1)


In [3]:
import numpy as np

s2 = Series(np.random.randn(6))  # creating a series from a numpy ndarray
print(s2)
print('Values=', s2.values)   # display values of the Series
print('Index=', s2.index)     # display indices of the Series

0    0.764151
1    0.217684
2   -0.045940
3   -0.029408
4    0.567084
5    1.321337
dtype: float64
Values= [ 0.76415051  0.21768395 -0.04593966 -0.02940802  0.5670841   1.32133652]
Index= RangeIndex(start=0, stop=6, step=1)


In [4]:
s3 = Series([1.2,2.5,-2.2,3.1,-0.8,-3.2], 
            index = ['Jan 1','Jan 2','Jan 3','Jan 4','Jan 5','Jan 6',])
print(s3)
print('Values=', s3.values)   # display values of the Series
print('Index=', s3.index)     # display indices of the Series

Jan 1    1.2
Jan 2    2.5
Jan 3   -2.2
Jan 4    3.1
Jan 5   -0.8
Jan 6   -3.2
dtype: float64
Values= [ 1.2  2.5 -2.2  3.1 -0.8 -3.2]
Index= Index(['Jan 1', 'Jan 2', 'Jan 3', 'Jan 4', 'Jan 5', 'Jan 6'], dtype='object')


In [5]:
capitals = {'MI': 'Lansing', 'CA': 'Sacramento', 'TX': 'Austin', 'MN': 'St Paul'}

s4 = Series(capitals)   # creating a series from dictionary object
print(s4)
print('Values=', s4.values)   # display values of the Series
print('Index=', s4.index)     # display indices of the Series

MI       Lansing
CA    Sacramento
TX        Austin
MN       St Paul
dtype: object
Values= ['Lansing' 'Sacramento' 'Austin' 'St Paul']
Index= Index(['MI', 'CA', 'TX', 'MN'], dtype='object')


In [6]:
s3 = Series([1.2,2.5,-2.2,3.1,-0.8,-3.2], 
            index = ['Jan 1','Jan 2','Jan 3','Jan 4','Jan 5','Jan 6',])
print(s3)

# Accessing elements of a Series

print('\ns3[2]=', s3[2])        # display third element of the Series
print('s3[\'Jan 3\']=', s3['Jan 3'])   # indexing element of a Series 

print('\ns3[1:3]=')             # display a slice of the Series
print(s3[1:3])
print('s3.iloc([1:3])=')      # display a slice of the Series
print(s3.iloc[1:3])

Jan 1    1.2
Jan 2    2.5
Jan 3   -2.2
Jan 4    3.1
Jan 5   -0.8
Jan 6   -3.2
dtype: float64

s3[2]= -2.2
s3['Jan 3']= -2.2

s3[1:3]=
Jan 2    2.5
Jan 3   -2.2
dtype: float64
s3.iloc([1:3])=
Jan 2    2.5
Jan 3   -2.2
dtype: float64


In [7]:
print('shape =', s3.shape)  # get the dimension of the Series
print('size =', s3.size)    # get the # of elements of the Series

shape = (6,)
size = 6


In [8]:
print(s3[s3 > 0])   # applying filter to select elements of the Series

Jan 1    1.2
Jan 2    2.5
Jan 4    3.1
dtype: float64


In [9]:
print(s3[s3 > 0])   # applying filter to select elements of the Series

Jan 1    1.2
Jan 2    2.5
Jan 4    3.1
dtype: float64


In [10]:
print(np.log(s3 + 4))    # applying numpy math functions to a numeric Series

Jan 1    1.648659
Jan 2    1.871802
Jan 3    0.587787
Jan 4    1.960095
Jan 5    1.163151
Jan 6   -0.223144
dtype: float64


- DataFrame

In [11]:
from pandas import DataFrame

cars = {'make': ['Ford', 'Honda', 'Toyota', 'Tesla'],
       'model': ['Taurus', 'Accord', 'Camry', 'Model S'],
       'MSRP': [27595, 23570, 23495, 68000]}          
carData = DataFrame(cars)   # creating DataFrame from dictionary
carData                     # display the table

Unnamed: 0,make,model,MSRP
0,Ford,Taurus,27595
1,Honda,Accord,23570
2,Toyota,Camry,23495
3,Tesla,Model S,68000


In [12]:
print(carData.index)       # print the row indices
print(carData.columns)     # print the column indices

RangeIndex(start=0, stop=4, step=1)
Index(['make', 'model', 'MSRP'], dtype='object')


In [13]:
carData2 = DataFrame(cars, index = [1,2,3,4])  # change the row index
carData2['year'] = 2018    # add column with same value
carData2['dealership'] = ['Courtesy Ford','Capital Honda','Spartan Toyota','N/A']
carData2                   # display table

Unnamed: 0,make,model,MSRP,year,dealership
1,Ford,Taurus,27595,2018,Courtesy Ford
2,Honda,Accord,23570,2018,Capital Honda
3,Toyota,Camry,23495,2018,Spartan Toyota
4,Tesla,Model S,68000,2018,


In [14]:
tuplelist = [(2011,45.1,32.4),(2012,42.4,34.5),(2013,47.2,39.2),
              (2014,44.2,31.4),(2015,39.9,29.8),(2016,41.5,36.7)]
columnNames = ['year','temp','precip']
weatherData = DataFrame(tuplelist, columns=columnNames)
weatherData

Unnamed: 0,year,temp,precip
0,2011,45.1,32.4
1,2012,42.4,34.5
2,2013,47.2,39.2
3,2014,44.2,31.4
4,2015,39.9,29.8
5,2016,41.5,36.7


In [15]:
import numpy as np

npdata = np.random.randn(5,3)  # create a 5 by 3 random matrix
columnNames = ['x1','x2','x3']
data = DataFrame(npdata, columns=columnNames)
data

Unnamed: 0,x1,x2,x3
0,0.697552,-0.240138,-0.753481
1,1.496075,-1.434707,0.766356
2,-1.043324,0.302267,0.376156
3,0.431778,-0.745907,-0.882314
4,2.894505,-0.549415,-0.444557


In [16]:
# accessing an entire column will return a Series object

print(data['x2'])
print(type(data['x2']))

0   -0.240138
1   -1.434707
2    0.302267
3   -0.745907
4   -0.549415
Name: x2, dtype: float64
<class 'pandas.core.series.Series'>


In [17]:
# accessing an entire row will return a Series object

print('Row 3 of data table:')
print(data.iloc[2])       # returns the 3rd row of DataFrame
print(type(data.iloc[2]))
print('\nRow 3 of car data table:')
print(carData2.iloc[2])   # row contains objects of different types

Row 3 of data table:
x1   -1.043324
x2    0.302267
x3    0.376156
Name: 2, dtype: float64
<class 'pandas.core.series.Series'>

Row 3 of car data table:
make                  Toyota
model                  Camry
MSRP                   23495
year                    2018
dealership    Spartan Toyota
Name: 3, dtype: object


In [18]:
# accessing a specific element of the DataFrame

print(carData2.iloc[1,2])      # retrieving second row, third column
print(carData2.loc[1,'model']) # retrieving second row, column named 'model'

# accessing a slice of the DataFrame

print('carData2.iloc[1:3,1:3]=')
print(carData2.iloc[1:3,1:3])

23570
Taurus
carData2.iloc[1:3,1:3]=
    model   MSRP
2  Accord  23570
3   Camry  23495


In [19]:
print('carData2.shape =', carData2.shape)
print('carData2.size =', carData2.size)

carData2.shape = (4, 5)
carData2.size = 20


In [28]:
# selection and filtering

print('carData2[carData2.MSRP > 25000]a')  
print(carData2[carData2.MSRP > 25000])

carData2[carData2.MSRP > 25000]a
    make    model   MSRP  year     dealership
1   Ford   Taurus  27595  2018  Courtesy Ford
4  Tesla  Model S  68000  2018            N/A


- Arithmetic Operations

In [29]:
print(data)

print('Data transpose operation:')
print(data.T)    # transpose operation

print('Addition:')
print(data + 4)    # addition operation

print('Multiplication:')
print(data * 10)   # multiplication operation

         x1        x2        x3
0  0.697552 -0.240138 -0.753481
1  1.496075 -1.434707  0.766356
2 -1.043324  0.302267  0.376156
3  0.431778 -0.745907 -0.882314
4  2.894505 -0.549415 -0.444557
Data transpose operation:
           0         1         2         3         4
x1  0.697552  1.496075 -1.043324  0.431778  2.894505
x2 -0.240138 -1.434707  0.302267 -0.745907 -0.549415
x3 -0.753481  0.766356  0.376156 -0.882314 -0.444557
Addition:
         x1        x2        x3
0  4.697552  3.759862  3.246519
1  5.496075  2.565293  4.766356
2  2.956676  4.302267  4.376156
3  4.431778  3.254093  3.117686
4  6.894505  3.450585  3.555443
Multiplication:
          x1         x2        x3
0   6.975517  -2.401383 -7.534807
1  14.960750 -14.347069  7.663562
2 -10.433236   3.022666  3.761558
3   4.317782  -7.459071 -8.823139
4  28.945051  -5.494152 -4.445568


In [30]:
print('data =')
print(data)

columnNames = ['x1','x2','x3']
data2 = DataFrame(np.random.randn(5,3), columns=columnNames)
print('\ndata2 =')
print(data2)

print('\ndata + data2 = ')
print(data.add(data2))

print('\ndata * data2 = ')
print(data.mul(data2))

data =
         x1        x2        x3
0  0.697552 -0.240138 -0.753481
1  1.496075 -1.434707  0.766356
2 -1.043324  0.302267  0.376156
3  0.431778 -0.745907 -0.882314
4  2.894505 -0.549415 -0.444557

data2 =
         x1        x2        x3
0 -0.819767 -0.622894  0.088763
1  0.246542  0.642555  1.947827
2 -1.157181  0.962443 -0.534328
3 -0.233810 -0.583044 -1.673141
4 -1.322579  0.605855 -0.716271

data + data2 = 
         x1        x2        x3
0 -0.122215 -0.863032 -0.664718
1  1.742617 -0.792152  2.714184
2 -2.200505  1.264709 -0.158173
3  0.197968 -1.328951 -2.555455
4  1.571926  0.056440 -1.160827

data * data2 = 
         x1        x2        x3
0 -0.571830  0.149581 -0.066881
1  0.368845 -0.921878  1.492730
2  1.207314  0.290914 -0.200991
3 -0.100954  0.434897  1.476235
4 -3.828211 -0.332866  0.318423


In [31]:
print(data.abs())    # get the absolute value for each element

print('\nMaximum value per column:')
print(data.max())    # get maximum value for each column

print('\nMinimum value per row:')
print(data.min(axis=1))    # get minimum value for each row

print('\nSum of values per column:')
print(data.sum())    # get sum of values for each column

print('\nAverage value per row:')
print(data.mean(axis=1))    # get average value for each row

print('\nCalculate max - min per column')
f = lambda x: x.max() - x.min()
print(data.apply(f))

print('\nCalculate max - min per row')
f = lambda x: x.max() - x.min()
print(data.apply(f, axis=1))

         x1        x2        x3
0  0.697552  0.240138  0.753481
1  1.496075  1.434707  0.766356
2  1.043324  0.302267  0.376156
3  0.431778  0.745907  0.882314
4  2.894505  0.549415  0.444557

Maximum value per column:
x1    2.894505
x2    0.302267
x3    0.766356
dtype: float64

Minimum value per row:
0   -0.753481
1   -1.434707
2   -1.043324
3   -0.882314
4   -0.549415
dtype: float64

Sum of values per column:
x1    4.476586
x2   -2.667901
x3   -0.937839
dtype: float64

Average value per row:
0   -0.098689
1    0.275908
2   -0.121634
3   -0.398814
4    0.633511
dtype: float64

Calculate max - min per column
x1    3.937829
x2    1.736974
x3    1.648670
dtype: float64

Calculate max - min per row
0    1.451032
1    2.930782
2    1.419479
3    1.314092
4    3.443920
dtype: float64
