<a href="https://colab.research.google.com/github/JaiswalFelipe/Learning-Python/blob/main/Pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Always adapt to NEWER TOOLS
# Documentation: pandas.pydata.org
import pandas as pd # Numpy based
import numpy as np

In [None]:
# Checking version
pd.__version__

'1.1.5'

### **Basics**

##### Pandas Series
- The series object is a single-column data, or a set of values that correspond to a single variable 
- Pandas series object corresponds to the one-dimensional Numpy array structure

In [None]:
l = ['A','B','C','D']

print(l)
print(type(l))

['A', 'B', 'C', 'D']
<class 'list'>


In [None]:
# Turn list into a series

series = pd.Series(l)
print(series) 
print(type(series))

0    A
1    B
2    C
3    D
dtype: object
<class 'pandas.core.series.Series'>


In [None]:
# Integer type
int_series = pd.Series([1,2,3,4])
int_series

0    1
1    2
2    3
3    4
dtype: int64

In [None]:
# Numpy: one-dimensional array structure (ndarray)

array_a = np.array([1,2,3,4,5])
array_a

array([1, 2, 3, 4, 5])

In [None]:
type(array_a)

numpy.ndarray

In [None]:
# Store these values into a series
series_a = pd.Series(array_a)
series_a

0    1
1    2
2    3
3    4
4    5
dtype: int64

##### Attributes
- A variable providing metadata about an object
- Method: a function that can be assoc. with an object

In [None]:
# "." Notation
print(series_a.dtype)
print(series_a.size)

int64
5


In [None]:
# Give name to an object
series_a.name = 'Created Series'
print(series_a)
print(series_a.name)

0    1
1    2
2    3
3    4
4    5
Name: Created Series, dtype: int64
Created Series


##### Indexing

In [None]:
capitals = {"Norway": "Oslo", "France": "Paris", "Japan": "Toyo"}
print(capitals)
print(type(capitals))

{'Norway': 'Oslo', 'France': 'Paris', 'Japan': 'Toyo'}
<class 'dict'>


In [None]:
# Put "capitals" dict to a pandas series
cap_series = pd.Series(capitals)
cap_series

Norway     Oslo
France    Paris
Japan      Toyo
dtype: object

In [None]:
cap_series.index

Index(['Norway', 'France', 'Japan'], dtype='object')

##### Label-based vs Position-based Indexing

- Explicit vs Implicit











In [None]:
# Label-based Indexing = Explicitly specified index
print(cap_series)
print(cap_series['Norway'])

Norway     Oslo
France    Paris
Japan      Toyo
dtype: object
Oslo


In [None]:
# Range index (POSITION-BASED Indexing or Zero-based indexing)
# Implicit Indexing (default): If user does not specify index
print(int_series)
print(int_series.index)
print(type(int_series.index))  

0    1
1    2
2    3
3    4
dtype: int64
RangeIndex(start=0, stop=4, step=1)
<class 'pandas.core.indexes.range.RangeIndex'>


In [None]:
# List the indicies
list(int_series.index)

[0, 1, 2, 3]

##### Working with Indices in Python

In [None]:
# Implicit Index
print(int_series[0])
print(cap_series[0])

1
Oslo


In [None]:
# Explicit Index
series_b = pd.Series([11,22,33,44,55], index = [1,2,3,4,5]) 
series_b

1    11
2    22
3    33
4    44
5    55
dtype: int64

In [None]:
series_c = pd.Series([11,22,33,44,55], index = ["1","2","3","4","5"]) # Explicit as str (Label-based)
series_c["1"]

11

In [None]:
# Another example of Expl
cap_series['France']

'Paris'

##### Using Methods
- Can have access to the object's data
- Can manipulate the object's state
- a parameter (n)  vs an argument (our choice for n) 

In [None]:
sales = pd.Series({"7/4/2021": 3999,
                   "8/4/2021": 3000,
                   "8/23/2021": 3000,
                   "5/15/2020": 3000,
                   "6/12/2019": 3000})

sales

7/4/2021     3999
8/4/2021     3000
8/23/2021    3000
5/15/2020    3000
6/12/2019    3000
dtype: int64

In [None]:
# Methods

print("INFORMATION 'FROM' DATASET")
print(sales.sum())
print(sales.min())
print(sales.max())
print(sales.mean())

print("")

print("INFORMATION 'ABOUT' DATASET") # Label
print(sales.idxmax())
print(sales.idxmin())

INFORMATION 'FROM' DATASET
15999
3000
3999
3199.8

INFORMATION 'ABOUT' DATASET
7/4/2021
8/4/2021


In [None]:
# Non-mathematical for non-numeric
print(sales.head())
print("")
print(sales.tail(2))   # You can be specific
print("")
print(sales.describe(include='all'))
print("")
print(sales.shape)

7/4/2021     3999
8/4/2021     3000
8/23/2021    3000
5/15/2020    3000
6/12/2019    3000
dtype: int64

5/15/2020    3000
6/12/2019    3000
dtype: int64

count       5.000000
mean     3199.800000
std       446.766382
min      3000.000000
25%      3000.000000
50%      3000.000000
75%      3000.000000
max      3999.000000
dtype: float64

(5,)


### **Pandas DataFrame**
- DataFrame: Multi-column data(2D data structure: columns and rows) 
- vs Series: Single (1D: rows)


In [None]:
data = {'ProductName':['Product A','Product B','Product C'],
        'ProductPrice': [200,31,51]}

df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,200
1,Product B,31
2,Product C,51


In [None]:
# With specified Index
df = pd.DataFrame(data, index=['A','B','C'])
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,200
B,Product B,31
C,Product C,51


In [None]:
# Pro Way
product_IDs = ['A','B','C']
df = pd.DataFrame(data, index=product_IDs)
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,200
B,Product B,31
C,Product C,51


In [None]:
# Construct a DF from a dictionary of pandas series
ser_products = pd.Series(['Product A','Product B','Product C'], index=product_IDs)
ser_prices = pd.Series([200,31,51], index=product_IDs)

data = {'ProductName':ser_products, 'ProductPrice':ser_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,200
B,Product B,31
C,Product C,51


In [None]:
# Setting col_names
df.columns =['Name','Price']
df

Unnamed: 0,Name,Price
A,Product A,200
B,Product B,31
C,Product C,51


In [None]:
# Setting Index
df.index = ['A1','B2','C3']
df

Unnamed: 0,Name,Price
A1,Product A,200
B2,Product B,31
C3,Product C,51


In [None]:
# Professional Way
df = pd.DataFrame(data = [['Product A', 222], ['Product B', 22], ['Product C', 332]],
                  columns = ['PName','PPrice'],
                  index = ['A1','A2','A3'])
display(df)
print(df.shape)

Unnamed: 0,PName,PPrice
A1,Product A,222
A2,Product B,22
A3,Product C,332


(3, 2)


### **Data Cleaning and Data Preprocessing**
- Data Cleaning: Deprived of inconsistencies
- Data Preprocessing: Applying Statistical Methods


#### **Pandas Series II**

##### Common methods

In [None]:
data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/365 Data Science/Programming/Pandas/Location.csv", 
                   squeeze = True) # squeeze: to directly obtain the series

location_data = data.copy()
print(location_data.head())
print(type(location_data))

0     Location 3
1     Location 6
2     Location 8
3    Location 26
4    Location 34
Name: Location, dtype: object
<class 'pandas.core.series.Series'>


In [None]:
#location_data.describe()
# What's .describe?:
#len(location_data)
location_data.nunique() # Count distinct observations over requested axis. -
# Return Series with number of distinct observations. Can ignore NaN values.
#type(location_data.nunique())

# .unique() delivers the values in the order they have appeared in the data set

296

##### Converting Series into Arrays

In [None]:
# Not recommended
location_data.values

array(['Location 3', 'Location 6', 'Location 8', ..., 'Location 11',
       'Location 26', 'Location 94'], dtype=object)

In [None]:
# Better way: yields the same result but in a better format
location_data.array

In [None]:
# Convert Series into np array
location_data.to_numpy()

# You can also use it to convert data types float <-> int 
# var = df[indices/column].to_numpy(dtype='float')

array(['Location 3', 'Location 6', 'Location 8', ..., 'Location 11',
       'Location 26', 'Location 94'], dtype=object)

##### sort_values()

In [None]:
numbers = pd.Series([1,22,11,13,6])

# Default: ascending
numbers.sort_values()  # Descending: sort_values(ascending=False)

0     1
4     6
2    11
3    13
1    22
dtype: int64

##### Attribute and Method Chaining

In [None]:
# Attribute Chaining

print(location_data.index)
print(location_data.name)
display(location_data.head())

RangeIndex(start=0, stop=1043, step=1)
Location


0     Location 3
1     Location 6
2     Location 8
3    Location 26
4    Location 34
Name: Location, dtype: object

In [None]:
# Notice the index
location_data.index.name = 'Index'
location_data

Index
0        Location 3
1        Location 6
2        Location 8
3       Location 26
4       Location 34
           ...     
1038    Location 73
1039    Location 82
1040    Location 11
1041    Location 26
1042    Location 94
Name: Location, Length: 1043, dtype: object

In [None]:
# Method Chaining

location_data.sort_values().head()

Index
637     Location 1
884     Location 1
465     Location 1
716    Location 10
623    Location 10
Name: Location, dtype: object

In [None]:
print(location_data.index)
print(type(location_data.index))

RangeIndex(start=0, stop=1043, step=1, name='Index')
<class 'pandas.core.indexes.range.RangeIndex'>


In [None]:
# You can directly convert the index into an array
location_data.index.to_numpy()

array([   0,    1,    2, ..., 1040, 1041, 1042])

##### .sort_index()

In [None]:
location_data_sv = location_data.sort_values(ascending=False)
location_data_sv.head()

Index
372    Location 99
757    Location 98
669    Location 97
128    Location 97
482    Location 97
Name: Location, dtype: object

In [None]:
# Notice that the "index" is not sorted
location_data_sv.index

Int64Index([ 372,  757,  669,  128,  482,  271,   29,  612,  598,  518,
            ...
            1010,  329,  542,  298,   56,  623,  716,  465,  884,  637],
           dtype='int64', name='Index', length=1043)

In [None]:
location_data_sv.index.sort_values() 
# This did not overwrite the contents of the data, try with .head() to confirm 
# .index.sort_values() is only useful if you need to obtain the sorted index structure as a seperate entity for some reason 

Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
            ...
            1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042],
           dtype='int64', name='Index', length=1043)

In [None]:
# Rerun to try the omega way
#location_data_sv = location_data.sort_values(ascending=False)

# Without storing it into a variable, it doesn't affect the state of the initial data set -
# It only creates a "temporary" copy of the object's data it's been applied to and displays the output of its manipulation

In [None]:
location_data_sv = location_data.sort_index()
location_data_sv

Index
0        Location 3
1        Location 6
2        Location 8
3       Location 26
4       Location 34
           ...     
1038    Location 73
1039    Location 82
1040    Location 11
1041    Location 26
1042    Location 94
Name: Location, Length: 1043, dtype: object

#### **Pandas DataFrames II**


##### A Revision to pandas DataFrames

In [None]:
array_a = np.array([[3, 2, 1], [4,3,2]]) # Notice 2D 
print(array_a)

df = pd.DataFrame(array_a, columns = ['Column 1','Column 2','Column 3']) # Set columns
display(df)

[[3 2 1]
 [4 3 2]]


Unnamed: 0,Column 1,Column 2,Column 3
0,3,2,1
1,4,3,2


In [None]:
df = pd.DataFrame(array_a, columns = ['Column 1','Column 2','Column 3'], index = ['Row 1', 'Row 2']) # + Index
df

Unnamed: 0,Column 1,Column 2,Column 3
Row 1,3,2,1
Row 2,4,3,2


In [None]:
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/365 Data Science/Programming/Pandas/ex3/4_6_lending-company-data.zip',
                   index_col = 'StringID') # Assign "LoanID" as index column
lending_co_data = data.copy()
lending_co_data.head()

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_2,2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
LoanID_3,3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
LoanID_4,4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
LoanID_5,5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


##### Common Attributes for Working with DataFrames

In [None]:
print(lending_co_data.index) # Dont confuse with the default integer index: 0,1,2,3
print(lending_co_data.columns)

Int64Index([   1,    2,    3,    4,    5,    6,    7,    8,    9,   10,
            ...
            1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043],
           dtype='int64', name='LoanID', length=1043)
Index(['StringID', 'Product', 'CustomerGender', 'Location', 'Region',
       'TotalPrice', 'StartDate', 'Deposit', 'DailyRate', 'TotalDaysYr',
       'AmtPaid36', 'AmtPaid60', 'AmtPaid360', 'LoanStatus'],
      dtype='object')


In [None]:
lending_co_data.axes  # Combined .index and .columns

[Int64Index([   1,    2,    3,    4,    5,    6,    7,    8,    9,   10,
             ...
             1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043],
            dtype='int64', name='LoanID', length=1043),
 Index(['StringID', 'Product', 'CustomerGender', 'Location', 'Region',
        'TotalPrice', 'StartDate', 'Deposit', 'DailyRate', 'TotalDaysYr',
        'AmtPaid36', 'AmtPaid60', 'AmtPaid360', 'LoanStatus'],
       dtype='object')]

In [None]:
print(lending_co_data.dtypes)
print(lending_co_data.shape)

StringID           object
Product            object
CustomerGender     object
Location           object
Region             object
TotalPrice        float64
StartDate          object
Deposit             int64
DailyRate           int64
TotalDaysYr         int64
AmtPaid36           int64
AmtPaid60           int64
AmtPaid360          int64
LoanStatus         object
dtype: object
(1043, 14)


In [None]:
lending_co_data.values # But use the better method, coming up

array([['LoanID_1', 'Product B', 'Female', ..., 4166, 14621, 'Active'],
       ['LoanID_2', 'Product D', 'Female', ..., 4096, 16041, 'Active'],
       ['LoanID_3', 'Product B', 'Male', ..., 3205, 16340, nan],
       ...,
       ['LoanID_1041', 'Product A', 'NotSpecified', ..., 5143, 16617,
        'Finished Payment'],
       ['LoanID_1042', 'Product B', 'Female', ..., 3462, 15617,
        'Finished Payment'],
       ['LoanID_1043', 'Product A', 'NotSpecified', ..., 4743, 16617,
        'Finished Payment']], dtype=object)

In [None]:
lending_co_data.to_numpy() # BOOM CHAKALAKA (For compatibility issues in the long term)

array([['LoanID_1', 'Product B', 'Female', ..., 4166, 14621, 'Active'],
       ['LoanID_2', 'Product D', 'Female', ..., 4096, 16041, 'Active'],
       ['LoanID_3', 'Product B', 'Male', ..., 3205, 16340, nan],
       ...,
       ['LoanID_1041', 'Product A', 'NotSpecified', ..., 5143, 16617,
        'Finished Payment'],
       ['LoanID_1042', 'Product B', 'Female', ..., 3462, 15617,
        'Finished Payment'],
       ['LoanID_1043', 'Product A', 'NotSpecified', ..., 4743, 16617,
        'Finished Payment']], dtype=object)

##### Data Selection
- Python is Case-sensitive

In [None]:
lending_co_data.Product   # Select specific column from the dataset

LoanID
1       Product B
2       Product D
3       Product B
4       Product A
5       Product B
          ...    
1039    Product B
1040    Product A
1041    Product A
1042    Product B
1043    Product A
Name: Product, Length: 1043, dtype: object

In [None]:
# Alternative: Indexing Operator
# Only way if "column name" has white "spaces" since .notation will throw an error
lending_co_data['Product']

LoanID
1       Product B
2       Product D
3       Product B
4       Product A
5       Product B
          ...    
1039    Product B
1040    Product A
1041    Product A
1042    Product B
1043    Product A
Name: Product, Length: 1043, dtype: object

In [None]:
# Nesting a list
lending_co_data[['Product']] # Notice the change of the output, it's now a data frame

Unnamed: 0_level_0,Product
LoanID,Unnamed: 1_level_1
1,Product B
2,Product D
3,Product B
4,Product A
5,Product B
...,...
1039,Product B
1040,Product A
1041,Product A
1042,Product B


In [None]:
lending_co_data[['Product', 'Location']].head()

Unnamed: 0_level_0,Product,Location
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Product B,Location 3
2,Product D,Location 6
3,Product B,Location 8
4,Product A,Location 26
5,Product B,Location 34


In [None]:
prod_loc = ['Product', 'Location'] 
lending_co_data[prod_loc].head()

Unnamed: 0_level_0,Product,Location
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Product B,Location 3
2,Product D,Location 6
3,Product B,Location 8
4,Product A,Location 26
5,Product B,Location 34


##### Data Selection - Indexing Data with .iloc[]
- "i" for Integer and "loc" for location
- Purely integer-location based indexing for selection by position.
- .iloc[] is primarily integer position based (from 0 to length-1 of the axis), but may also be used with a boolean array.

In [None]:
lending_co_data.iloc[1]

StringID            LoanID_2
Product            Product D
CustomerGender        Female
Location          Location 6
Region              Region 6
TotalPrice               NaN
StartDate         02/01/2019
Deposit                 2200
DailyRate                 45
TotalDaysYr              365
AmtPaid36               3161
AmtPaid60               4096
AmtPaid360             16041
LoanStatus            Active
Name: 2, dtype: object

In [None]:
lending_co_data.iloc[1, 3] # The SECOND row [1] of the FOURTH column [3] 

'Location 6'

In [None]:
# Obtain the entire "2nd row" from the dataframe using iloc
lending_co_data.iloc[1,:]   # ":" means data from all columns , can also be used as data from all rows

StringID            LoanID_2
Product            Product D
CustomerGender        Female
Location          Location 6
Region              Region 6
TotalPrice               NaN
StartDate         02/01/2019
Deposit                 2200
DailyRate                 45
TotalDaysYr              365
AmtPaid36               3161
AmtPaid60               4096
AmtPaid360             16041
LoanStatus            Active
Name: 2, dtype: object

In [None]:
# Obtain the entire "Location column" from the data
lending_co_data.iloc[:, 3]

LoanID
1        Location 3
2        Location 6
3        Location 8
4       Location 26
5       Location 34
           ...     
1039    Location 73
1040    Location 82
1041    Location 11
1042    Location 26
1043    Location 94
Name: Location, Length: 1043, dtype: object

In [None]:
# Obtain entire 2nd and 4th rows
# [1, 3] row specifier
lending_co_data.iloc[[1, 3], :]

Unnamed: 0_level_0,StringID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2,LoanID_2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
4,LoanID_4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active


In [None]:
# Obtain entire 2nd and 4th columns
# Here, [1, 3] is a column specifier
lending_co_data.iloc[:, [1, 3]]

Unnamed: 0_level_0,Product,Location
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Product B,Location 3
2,Product D,Location 6
3,Product B,Location 8
4,Product A,Location 26
5,Product B,Location 34
...,...,...
1039,Product B,Location 73
1040,Product A,Location 82
1041,Product A,Location 11
1042,Product B,Location 26


##### Data Selection - Indexing Data with .loc[]
- sub-select info from a df by referring to its index labels


In [None]:
# Specific row (entire)
lending_co_data.loc['LoanID_3'] 

LoanID                     3
Product            Product B
CustomerGender          Male
Location          Location 8
Region              Region 3
TotalPrice             16600
StartDate         08/12/2016
Deposit                 1000
DailyRate                 45
TotalDaysYr              365
AmtPaid36               2260
AmtPaid60               3205
AmtPaid360             16340
LoanStatus               NaN
Name: LoanID_3, dtype: object

In [None]:
# More readable?
lending_co_data.loc['LoanID_3', :]

LoanID                     3
Product            Product B
CustomerGender          Male
Location          Location 8
Region              Region 3
TotalPrice             16600
StartDate         08/12/2016
Deposit                 1000
DailyRate                 45
TotalDaysYr              365
AmtPaid36               2260
AmtPaid60               3205
AmtPaid360             16340
LoanStatus               NaN
Name: LoanID_3, dtype: object

In [None]:
# Trace the entry for the "3rd" loan and its "Region"
lending_co_data.loc['LoanID_3', 'Region']

'Region 3'

In [None]:
# Entire Column
# lending_co_data['Location'] is the same as, but using loc
# Without ":" will throw an error
lending_co_data.loc[:, 'Location']

StringID
LoanID_1        Location 3
LoanID_2        Location 6
LoanID_3        Location 8
LoanID_4       Location 26
LoanID_5       Location 34
                  ...     
LoanID_1039    Location 73
LoanID_1040    Location 82
LoanID_1041    Location 11
LoanID_1042    Location 26
LoanID_1043    Location 94
Name: Location, Length: 1043, dtype: object

##### A Few Comments on Using .loc[] and .iloc[]

In [None]:
lending_co_data['TotalPrice'].iloc[0] # Row 1 of TotalPrice column

17600.0

In [None]:
lending_co_data['TotalPrice'].loc['LoanID_1'] 

17600.0

In [None]:
lending_co_data.iloc[0, 5] # "0" Row, "5" column specifier

17600.0

In [None]:
lending_co_data.loc['LoanID_1', 'TotalPrice'] # Explicitly

17600.0

In [None]:
# Can also obtain a dataframe 
#lending_co_data.loc[['LoanID_1', 'LoanID_6']]

# This is the ADVICED approach: 
lending_co_data.loc[['LoanID_1', 'LoanID_6'], :]

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_6,6,Product A,Male,Location 34,Region 1,,19/04/2019,2200,45,365,3301,4066,15141,Active


In [None]:
# Noob way: lending_co_data['TotalPrice'].iloc[[0, 5]]
# Using ":" is more professional
lending_co_data.loc[:, 'TotalPrice'].iloc[[0, 5]]

StringID
LoanID_1    17600.0
LoanID_6        NaN
Name: TotalPrice, dtype: float64