# Python fundamentals summary
**Numpy and Pandas**

In [1]:
# Setup and imports
from dotenv import load_dotenv
import numpy as np
import pandas as pd
import os
from pathlib import Path

In [2]:
load_dotenv()
DATA_DIR_RAW = Path(os.getenv("DATA_DIR_RAW"))
PARENT = Path(os.getcwd()).parent
DATA_DIR_RAW = PARENT / DATA_DIR_RAW

In [3]:
df = pd.read_csv(DATA_DIR_RAW / "loan_data.csv")

In [4]:
df.head()

Unnamed: 0,credit.policy,purpose,int.rate,installment,log.annual.inc,dti,fico,days.with.cr.line,revol.bal,revol.util,inq.last.6mths,delinq.2yrs,pub.rec,not.fully.paid
0,1,debt_consolidation,0.1189,829.1,11.350407,19.48,737,5639.958333,28854,52.1,0,0,0,0
1,1,credit_card,0.1071,228.22,11.082143,14.29,707,2760.0,33623,76.7,0,0,0,0
2,1,debt_consolidation,0.1357,366.86,10.373491,11.63,682,4710.0,3511,25.6,1,0,0,0
3,1,debt_consolidation,0.1008,162.34,11.350407,8.1,712,2699.958333,33667,73.2,1,0,0,0
4,1,credit_card,0.1426,102.92,11.299732,14.97,667,4066.0,4740,39.5,0,1,0,0


In [13]:
df['credit.policy'].to_numpy()

array([1, 1, 1, ..., 0, 0, 0], shape=(9578,))

In [5]:
# Clean column names
cols_mapping = {
    'credit.policy': 'credit_policy',
    'int.rate': 'interest_rate',
    'installment': 'installment',
    'log.annual.inc': 'log_annual_income',
    'dti': 'debt_income_ratio',
    'days.with.cr.line': 'days_with_credit_line',
    'revol.bal': 'revolve_balance',
    'revol.util': 'revolve_utilized',
    'inq.last.6mths': 'inquiries_last_6_mon',
    'delinq.2yrs': 'delinquent_2_yrs',
    'pub.rec': 'public_recs',
    'not.fully.paid': 'default'
}
df = df.rename(columns=cols_mapping)

In [6]:
df.head()

Unnamed: 0,credit_policy,purpose,interest_rate,installment,log_annual_income,debt_income_ratio,fico,days_with_credit_line,revolve_balance,revolve_utilized,inquiries_last_6_mon,delinquent_2_yrs,public_recs,default
0,1,debt_consolidation,0.1189,829.1,11.350407,19.48,737,5639.958333,28854,52.1,0,0,0,0
1,1,credit_card,0.1071,228.22,11.082143,14.29,707,2760.0,33623,76.7,0,0,0,0
2,1,debt_consolidation,0.1357,366.86,10.373491,11.63,682,4710.0,3511,25.6,1,0,0,0
3,1,debt_consolidation,0.1008,162.34,11.350407,8.1,712,2699.958333,33667,73.2,1,0,0,0
4,1,credit_card,0.1426,102.92,11.299732,14.97,667,4066.0,4740,39.5,0,1,0,0


In [21]:
df.to_csv(DATA_DIR_RAW / "loan_data_cols_changed.csv", index=False)

### Validate saved data

In [7]:
df_from_csv = pd.read_csv(DATA_DIR_RAW / "loan_data_cols_changed.csv")

In [1]:
import sys, os
sys.path.append(os.path.abspath(".."))
from src import utils

In [10]:
checks = utils.validate_loaded(df, df_from_csv)
print(checks)

{'shape_equal': True, 'all_numeric_except_purpose': True, 'non_numeric_columns': []}
