In [2]:
import pandas as pd
import numpy as np
import datetime as dt

In [3]:
data = pd.read_csv(r'data/credit_customers.csv')

In [4]:
date = (dt.datetime.today()).strftime("%m-%d-%Y")
file_name = {0: f'Credit_Customers_Data_Cleaned_{date}', 1 : f'Exploratory_Analysis_{date}'}

In [5]:
class Credit():
    def __init__(self, data):
        self.data = data

    def clean_data(self):
        data = self.data.copy()
        data.drop_duplicates(inplace=True)
        data.dropna(inplace=True)

        data['Customer ID'] = range(1, data.shape[0] + 1)
        customer = data.pop('Customer ID')
        data.insert(0, 'customer_ID', customer)

        data['checking_status'] = data['checking_status'].replace({
            '<0' : '0', '0<=X<200' : 'less than 200', '>=200' : 'more than 200'
        }, regex=True)

        data['savings_status'] = data['savings_status'].replace({
            '<100' : 'less than 100', '500<=X<1000' : 'less than 1000', '>=1000' : 'more than 1000', '100<=X<500' : 'less than 500'
        }, regex=True)

        data['employment'] = data['employment'].replace({
            '>=7' : 'more than 7 years', '1<=X<4' : 'less than 4 years', '<1' : 'less than a year', '4<=X<7' : 'less than 7 years'
        }, regex=True)

        data = data.astype({'duration' : 'int', 'installment_commitment' : 'int', 'age' : 'int', 'existing_credits' : 'int'})

        data['credit_history'] = data['credit_history'].str.split('/')
        data = data.explode('credit_history')

        data['personal_status'] = data['personal_status'].str.split()
        data['personal_status'] = data['personal_status'].apply(lambda x: x[0])

        data.rename(
            columns = {'personal_status' : 'gender'}, inplace=True
        )

        data = data.reset_index(drop=True)

        return data
    
    def save_file(self, data, file):
        file_type = input('Enter file type: (excel or csv) ').lower()

        if file_type == 'excel':
            file = file + '.xlsx'
            return data.to_excel(file, index=False)
        if file_type == 'csv':
            file = file + '.csv'
            return data.to_csv(file, index=False)
        else:
            print('No Corresponding File Type Found...')
        

raw = Credit(data)
raw.clean_data().head()

Unnamed: 0,customer_ID,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,gender,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,class
0,1,0,6,critical,radio/tv,1169.0,no known savings,more than 7 years,4,male,...,real estate,67,none,own,2,skilled,1.0,yes,yes,good
1,1,0,6,other existing credit,radio/tv,1169.0,no known savings,more than 7 years,4,male,...,real estate,67,none,own,2,skilled,1.0,yes,yes,good
2,2,less than 200,48,existing paid,radio/tv,5951.0,less than 100,less than 4 years,2,female,...,real estate,22,none,own,1,skilled,1.0,none,yes,bad
3,3,no checking,12,critical,education,2096.0,less than 100,less than 7 years,2,male,...,real estate,49,none,own,1,unskilled resident,2.0,none,yes,good
4,3,no checking,12,other existing credit,education,2096.0,less than 100,less than 7 years,2,male,...,real estate,49,none,own,1,unskilled resident,2.0,none,yes,good


In [6]:
data.head() # Original Data Comparison

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,class
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,...,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes,good
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,...,real estate,22.0,none,own,1.0,skilled,1.0,none,yes,bad
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,...,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes,good
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,...,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes,good
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,...,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes,bad


In [7]:
raw.clean_data().shape # Number of Concatenated Rows Added

(1333, 22)

In [8]:
data.shape # Original Data Number of Rows

(1000, 21)

In [9]:
raw.save_file(raw.clean_data(), file_name[0])

No Corresponding File Type Found...
