In [None]:
# Import libraries
import pandas as pd
import numpy as np

In [None]:
# Load Excel File
filename = 'data/car_financing.xlsx'
df = pd.read_excel(filename)

In [None]:
## Filtering 
car_filter = df['car_type']=='Toyota Sienna'
interest_filter = df['interest_rate']==0.0702
df = df.loc[car_filter & interest_filter, :]

In [None]:
# Approach 1 dictionary substitution using rename method
df = df.rename(columns={'Starting Balance': 'starting_balance',
                        'Interest Paid': 'interest_paid', 
                        'Principal Paid': 'principal_paid',
                        'New Balance': 'new_balance'})

In [None]:
# Approach 2 list replacement
# Only changing Month -> month, but we need to list the rest of the columns
df.columns = ['month',
              'starting_balance',
              'Repayment',
              'interest_paid',
              'principal_paid',
              'new_balance',
              'term',
              'interest_rate',
              'car_type']

In [None]:
# Approach 1
# This approach allows you to drop multiple columns at a time 
df = df.drop(columns=['term'])

In [None]:
# Approach 2 use the del command
del df['Repayment']

In [None]:
df.shape

## Identifying Missing Data
Values will be originally missing from a dataset or be a product of data manipulation. In pandas, missing values are typically called `NaN` or `None`.

Missing data can: 
* Hint at data collection errors.
* Indicate improper conversion or manipulation.
* Actually not be considered missing. For some datasets, missing data can be listed as "zero", "false", "not applicable", "entered an empty string", among other possibilities. 

This is an important subject as before you can graph data, you should make sure you aren't trying to graph some missing values as that can cause an error or misinterpretation of the data. 

### Finding Missing Values

In [None]:
df.info()

Two common methods to indicate where values in a DataFrame are missing are `isna` and `isnull`. They are exactly the same methods, but with different names.

In [None]:
# Notice we have a Pandas Series of True and False values
df['interest_paid'].isna().head()

In [None]:
interest_missing = df['interest_paid'].isna()

In [None]:
# Looks at the row that contains the NaN for interest_paid
df.loc[interest_missing,:]

In [None]:
# Keep in mind that we can use the not operator (~) to negate the filter
# every row that doesn't have a nan is returned.
df.loc[~interest_missing,:]

In [None]:
# The code counts the number of missing values
# sum() works because Booleans are a subtype of integers. 
df['interest_paid'].isna().sum()

In [None]:
True + False + False 