## Common Attributes for Working with DataFrames

In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('Lending company.csv', index_col = 'LoanID')
lending_co_data = data.copy()
lending_co_data

Unnamed: 0_level_0,StringID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,LoanID_1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
2,LoanID_2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
3,LoanID_3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
4,LoanID_4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
5,LoanID_5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1039,LoanID_1039,Product B,Male,Location 73,Region 6,17300.0,29/12/2018,2200,45,365,3251,4743,16617,Finished Payment
1040,LoanID_1040,Product A,Male,Location 82,Region 1,,28/03/2018,2200,45,365,4090,5582,16617,Finished Payment
1041,LoanID_1041,Product A,NotSpecified,Location 11,Region 4,17300.0,26/04/2018,2200,45,365,4051,5143,16617,Finished Payment
1042,LoanID_1042,Product B,Female,Location 26,Region 6,16300.0,25/10/2016,1000,45,365,1930,3462,15617,Finished Payment


In [3]:
lending_co_data.index

Int64Index([   1,    2,    3,    4,    5,    6,    7,    8,    9,   10,
            ...
            1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043],
           dtype='int64', name='LoanID', length=1043)

In [4]:
type(lending_co_data.index)

pandas.core.indexes.numeric.Int64Index

In [5]:
lending_co_data.columns

Index(['StringID', 'Product', 'CustomerGender', 'Location', 'Region',
       'TotalPrice', 'StartDate', 'Deposit', 'DailyRate', 'TotalDaysYr',
       'AmtPaid36', 'AmtPaid60', 'AmtPaid360', 'LoanStatus'],
      dtype='object')

In [6]:
type(lending_co_data.columns)

pandas.core.indexes.base.Index

In [7]:
lending_co_data.axes

[Int64Index([   1,    2,    3,    4,    5,    6,    7,    8,    9,   10,
             ...
             1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043],
            dtype='int64', name='LoanID', length=1043),
 Index(['StringID', 'Product', 'CustomerGender', 'Location', 'Region',
        'TotalPrice', 'StartDate', 'Deposit', 'DailyRate', 'TotalDaysYr',
        'AmtPaid36', 'AmtPaid60', 'AmtPaid360', 'LoanStatus'],
       dtype='object')]

In [8]:
lending_co_data.dtypes

StringID           object
Product            object
CustomerGender     object
Location           object
Region             object
TotalPrice        float64
StartDate          object
Deposit             int64
DailyRate           int64
TotalDaysYr         int64
AmtPaid36           int64
AmtPaid60           int64
AmtPaid360          int64
LoanStatus         object
dtype: object

In [9]:
lending_co_data.values

array([['LoanID_1', 'Product B', 'Female', ..., 4166, 14621, 'Active'],
       ['LoanID_2', 'Product D', 'Female', ..., 4096, 16041, 'Active'],
       ['LoanID_3', 'Product B', 'Male', ..., 3205, 16340, nan],
       ...,
       ['LoanID_1041', 'Product A', 'NotSpecified', ..., 5143, 16617,
        'Finished Payment'],
       ['LoanID_1042', 'Product B', 'Female', ..., 3462, 15617,
        'Finished Payment'],
       ['LoanID_1043', 'Product A', 'NotSpecified', ..., 4743, 16617,
        'Finished Payment']], dtype=object)

In [10]:
type(lending_co_data.values)

numpy.ndarray

In [11]:
lending_co_data.to_numpy()

array([['LoanID_1', 'Product B', 'Female', ..., 4166, 14621, 'Active'],
       ['LoanID_2', 'Product D', 'Female', ..., 4096, 16041, 'Active'],
       ['LoanID_3', 'Product B', 'Male', ..., 3205, 16340, nan],
       ...,
       ['LoanID_1041', 'Product A', 'NotSpecified', ..., 5143, 16617,
        'Finished Payment'],
       ['LoanID_1042', 'Product B', 'Female', ..., 3462, 15617,
        'Finished Payment'],
       ['LoanID_1043', 'Product A', 'NotSpecified', ..., 4743, 16617,
        'Finished Payment']], dtype=object)

In [12]:
type(lending_co_data.to_numpy())

numpy.ndarray

In [13]:
lending_co_data.shape

(1043, 14)

In [14]:
len(lending_co_data.columns)

14

## Data Selection in pandas DataFrame

In [15]:
lending_co_data

Unnamed: 0_level_0,StringID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,LoanID_1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
2,LoanID_2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
3,LoanID_3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
4,LoanID_4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
5,LoanID_5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1039,LoanID_1039,Product B,Male,Location 73,Region 6,17300.0,29/12/2018,2200,45,365,3251,4743,16617,Finished Payment
1040,LoanID_1040,Product A,Male,Location 82,Region 1,,28/03/2018,2200,45,365,4090,5582,16617,Finished Payment
1041,LoanID_1041,Product A,NotSpecified,Location 11,Region 4,17300.0,26/04/2018,2200,45,365,4051,5143,16617,Finished Payment
1042,LoanID_1042,Product B,Female,Location 26,Region 6,16300.0,25/10/2016,1000,45,365,1930,3462,15617,Finished Payment


In [16]:
lending_co_data.Product

LoanID
1       Product B
2       Product D
3       Product B
4       Product A
5       Product B
          ...    
1039    Product B
1040    Product A
1041    Product A
1042    Product B
1043    Product A
Name: Product, Length: 1043, dtype: object

In [17]:
lending_co_data.Location

LoanID
1        Location 3
2        Location 6
3        Location 8
4       Location 26
5       Location 34
           ...     
1039    Location 73
1040    Location 82
1041    Location 11
1042    Location 26
1043    Location 94
Name: Location, Length: 1043, dtype: object

In [18]:
lending_co_data['Product']

LoanID
1       Product B
2       Product D
3       Product B
4       Product A
5       Product B
          ...    
1039    Product B
1040    Product A
1041    Product A
1042    Product B
1043    Product A
Name: Product, Length: 1043, dtype: object

In [19]:
lending_co_data['Location']

LoanID
1        Location 3
2        Location 6
3        Location 8
4       Location 26
5       Location 34
           ...     
1039    Location 73
1040    Location 82
1041    Location 11
1042    Location 26
1043    Location 94
Name: Location, Length: 1043, dtype: object

In [20]:
type(lending_co_data['Location'])

pandas.core.series.Series

In [21]:
lending_co_data[['Location']]

Unnamed: 0_level_0,Location
LoanID,Unnamed: 1_level_1
1,Location 3
2,Location 6
3,Location 8
4,Location 26
5,Location 34
...,...
1039,Location 73
1040,Location 82
1041,Location 11
1042,Location 26


In [22]:
type(lending_co_data[['Location']])

pandas.core.frame.DataFrame

In [23]:
lending_co_data[['Location', 'Product']].head()

Unnamed: 0_level_0,Location,Product
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Location 3,Product B
2,Location 6,Product D
3,Location 8,Product B
4,Location 26,Product A
5,Location 34,Product B


In [24]:
product_log = ['Location', 'Product']
lending_co_data[product_log].head()

Unnamed: 0_level_0,Location,Product
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Location 3,Product B
2,Location 6,Product D
3,Location 8,Product B
4,Location 26,Product A
5,Location 34,Product B


## Data Selection - Indexing Data with iloc[]

In [25]:
lending_co_data.iloc[1]

StringID            LoanID_2
Product            Product D
CustomerGender        Female
Location          Location 6
Region              Region 6
TotalPrice               NaN
StartDate         02/01/2019
Deposit                 2200
DailyRate                 45
TotalDaysYr              365
AmtPaid36               3161
AmtPaid60               4096
AmtPaid360             16041
LoanStatus            Active
Name: 2, dtype: object

In [26]:
lending_co_data.iloc[1,3]

'Location 6'

In [27]:
lending_co_data.iloc[1,:]

StringID            LoanID_2
Product            Product D
CustomerGender        Female
Location          Location 6
Region              Region 6
TotalPrice               NaN
StartDate         02/01/2019
Deposit                 2200
DailyRate                 45
TotalDaysYr              365
AmtPaid36               3161
AmtPaid60               4096
AmtPaid360             16041
LoanStatus            Active
Name: 2, dtype: object

In [28]:
lending_co_data.iloc[:, 3]

LoanID
1        Location 3
2        Location 6
3        Location 8
4       Location 26
5       Location 34
           ...     
1039    Location 73
1040    Location 82
1041    Location 11
1042    Location 26
1043    Location 94
Name: Location, Length: 1043, dtype: object

In [29]:
type(lending_co_data.iloc[1,3])

str

In [30]:
type(lending_co_data.iloc[1,:])

pandas.core.series.Series

In [31]:
type(lending_co_data.iloc[:,3])

pandas.core.series.Series

In [32]:
lending_co_data.iloc[[1,3], :]

Unnamed: 0_level_0,StringID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2,LoanID_2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
4,LoanID_4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active


In [33]:
lending_co_data.iloc[:, [1,3]]

Unnamed: 0_level_0,Product,Location
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Product B,Location 3
2,Product D,Location 6
3,Product B,Location 8
4,Product A,Location 26
5,Product B,Location 34
...,...,...
1039,Product B,Location 73
1040,Product A,Location 82
1041,Product A,Location 11
1042,Product B,Location 26


## Data Selection - Indexing Data with loc[]

In [34]:
data_2 = pd.read_csv('Lending company.csv', index_col = 'StringID')
lending_co_data_2 = data_2.copy()
lending_co_data_2

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_2,2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
LoanID_3,3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
LoanID_4,4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
LoanID_5,5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LoanID_1039,1039,Product B,Male,Location 73,Region 6,17300.0,29/12/2018,2200,45,365,3251,4743,16617,Finished Payment
LoanID_1040,1040,Product A,Male,Location 82,Region 1,,28/03/2018,2200,45,365,4090,5582,16617,Finished Payment
LoanID_1041,1041,Product A,NotSpecified,Location 11,Region 4,17300.0,26/04/2018,2200,45,365,4051,5143,16617,Finished Payment
LoanID_1042,1042,Product B,Female,Location 26,Region 6,16300.0,25/10/2016,1000,45,365,1930,3462,15617,Finished Payment


In [35]:
lending_co_data_2.loc['LoanID_3']

LoanID                     3
Product            Product B
CustomerGender          Male
Location          Location 8
Region              Region 3
TotalPrice           16600.0
StartDate         08/12/2016
Deposit                 1000
DailyRate                 45
TotalDaysYr              365
AmtPaid36               2260
AmtPaid60               3205
AmtPaid360             16340
LoanStatus               NaN
Name: LoanID_3, dtype: object

In [36]:
lending_co_data_2.loc['LoanID_3', :]

LoanID                     3
Product            Product B
CustomerGender          Male
Location          Location 8
Region              Region 3
TotalPrice           16600.0
StartDate         08/12/2016
Deposit                 1000
DailyRate                 45
TotalDaysYr              365
AmtPaid36               2260
AmtPaid60               3205
AmtPaid360             16340
LoanStatus               NaN
Name: LoanID_3, dtype: object

In [37]:
lending_co_data_2.loc['LoanID_3', 'Region']

'Region 3'

In [38]:
lending_co_data_2.loc[:,'Location']

StringID
LoanID_1        Location 3
LoanID_2        Location 6
LoanID_3        Location 8
LoanID_4       Location 26
LoanID_5       Location 34
                  ...     
LoanID_1039    Location 73
LoanID_1040    Location 82
LoanID_1041    Location 11
LoanID_1042    Location 26
LoanID_1043    Location 94
Name: Location, Length: 1043, dtype: object