In [None]:
# Import libraries
import pandas as pd
import numpy as np

In [None]:
# Load Excel File
filename = 'data/car_financing.xlsx'
df = pd.read_excel(filename)

In [None]:
## Filtering 
car_filter = df['car_type']=='Toyota Sienna'
interest_filter = df['interest_rate']==0.0702
df = df.loc[car_filter & interest_filter, :]

In [None]:
# Approach 1 dictionary substitution using rename method
df = df.rename(columns={'Starting Balance': 'starting_balance',
                        'Interest Paid': 'interest_paid', 
                        'Principal Paid': 'principal_paid',
                        'New Balance': 'new_balance'})

In [None]:
# Approach 2 list replacement
# Only changing Month -> month, but we need to list the rest of the columns
df.columns = ['month',
              'starting_balance',
              'Repayment',
              'interest_paid',
              'principal_paid',
              'new_balance',
              'term',
              'interest_rate',
              'car_type']

In [None]:
# Approach 1
# This approach allows you to drop multiple columns at a time 
df = df.drop(columns=['term'])

In [None]:
# Approach 2 use the del command
del df['Repayment']

In [None]:
df.shape

## Removing or Filling in Missing Data
This is an important subject as before you can graph data, you should make sure you aren't trying to graph some missing values as that can cause an error or misinterpretation of the data. 

In [None]:
df.info()

### Remove Missing Values
You can remove missing values by using the `dropna` method. 

In [None]:
# You can drop entire rows if they contain 'any' nans in them or 'all'
# this may not be the best strategy for our dataset
df[30:40].dropna(how = 'any')

### Filling in Missing Values
There are a [variety of ways to fill in missing values](https://pandas.pydata.org/pandas-docs/stable/user_guide/missing_data.html). 

In [None]:
# Looking at where missing data is located
df['interest_paid'][30:40]

In [None]:
# Filling in the nan with a zero is probably a bad idea. 
df['interest_paid'][30:40].fillna(0)

In [None]:
# back fill in value
df['interest_paid'][30:40].fillna(method='bfill')

In [None]:
# forward fill in value
df['interest_paid'][30:40].fillna(method='ffill')

In [None]:
# linear interpolation (filling in of values)
df['interest_paid'][30:40].interpolate(method = 'linear')

In [None]:
# Interest paid before filling in the nan with a value
df['interest_paid'].sum()

In [None]:
# Fill in with the actual value
interest_missing = df['interest_paid'].isna()
df.loc[interest_missing,'interest_paid'] = 93.24

In [None]:
# Interest paid after filling in the nan with a value
df['interest_paid'].sum()

In [None]:
# Notice we dont have NaN values in the DataFrame anymore
df.info()