In [2]:
class CFPB:
    """This class helps you initialise perform operations on a Python class object called CFPB"""
    def __init__(self, base_url = None, name=None, creationTimeStamp=None,  modifiedTimeStamp=None, createdBy=None, modifiedBy=None, start_date=None, end_date=None, has_narrative=None, dataframes=[]) -> None:
        
        import datetime

        # Initialisation of attributes
        self.base_url = None
        self.name = None
        self.creationTimeStamp = None
        self.modifiedTimeStamp = None
        self.createdBy = None
        self.modifiedBy = None
        self.start_date = datetime.datetime.now()
        self.end_date = datetime.datetime.now()
        self.has_narrative = None
        self.dataframes = []

        # Assign attributes based on what's passed

        self.base_url = base_url if base_url else "https://www.consumerfinance.gov/data-research/consumer-complaints/search/api/v1/"
        self.name = name if name else self.name
        self.creationTimeStamp = creationTimeStamp if creationTimeStamp else datetime.datetime.now()
        self.modifiedTimeStamp = modifiedTimeStamp if modifiedTimeStamp else datetime.datetime.now()
        self.createdBy = createdBy if createdBy else self.createdBy
        self.modifiedBy = modifiedBy if modifiedBy else self.modifiedBy
        self.start_date = datetime.datetime.now() if start_date else self.start_date
        self.end_date = datetime.datetime.now() if end_date else self.end_date
        self.has_narrative = has_narrative if has_narrative else self.has_narrative
        self.dataframes = dataframes if dataframes else self.dataframes

    def create_request_url(self,base_url, start_date, end_date, has_narrative):
        """This function creates a request URL based on the parameters passed"""
        request_url = base_url + "?format=json&date_received_min=" + start_date + "&date_received_max=" + end_date + "&has_narrative=" + has_narrative
        return request_url

    def get_data(self,request_url):
        """This function gets data from the request URL"""
        import requests
        response = requests.get(request_url)
        if response.status_code == 200:
            return response.status_code, response.json()
        else:
            return response.status_code, {"error": "An error occurred while fetching data", "status_code": response.status_code, "reason": response.reason}
        
    def convert_data_to_dataframe(self,data):
        """This function converts data to a pandas dataframe"""
        import pandas as pd
        return pd.DataFrame(data)       
    
    def cluster(self):
        """This function clusters data based on the parameters passed"""
        pass
     
    def load(self, start_date=None, end_date=None, has_narrative=None):
        """This function loads data based on the parameters passed"""
        import datetime
        self.start_date = min(self.start_date, datetime.datetime.strptime(start_date, "%Y-%m-%d")) if start_date else self.start_date
        self.end_date = max(self.end_date, datetime.datetime.strptime(end_date, "%Y-%m-%d")) if end_date else self.end_date
        self.has_narrative = has_narrative if has_narrative else self.has_narrative
        request_url = self.create_request_url(self.base_url, start_date, end_date, has_narrative)
        status_code, data = self.get_data(request_url)
        print(status_code)
        if status_code == 200:
            dataframe = self.convert_data_to_dataframe([ob["_source"]for ob in data])
            self.dataframes.append(dataframe)
            print(f"Data frame of {len(data)} rows and {len(dataframe.columns)} columns loaded")
        else:
            print(data)

In [3]:
new_cfpb = CFPB()

In [4]:
new_cfpb.load("2024-12-01", "2024-12-31", "true")

200
Data frame of 64833 rows and 18 columns loaded


In [5]:
data= new_cfpb.dataframes[0]




In [33]:
profile = data.describe().T
profile["column_name"]=profile.index
profile["id"]=list(range(0,len(profile.index)))
profile.set_index("id", inplace=True)
profile

Unnamed: 0_level_0,count,unique,top,freq,column_name
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,64833,11,Credit reporting or other personal consumer re...,52346,product
1,64833,38727,My credit reports are inaccurate. These inaccu...,1314,complaint_what_happened
2,64833,51,2024-12-10T12:00:00-05:00,3769,date_sent_to_company
3,64833,84,Incorrect information on your report,28394,issue
4,64833,53,Credit reporting,51997,sub_product
5,64833,5652,XXXXX,2185,zip_code
6,4139,3,Servicemember,2764,tags
7,64833,64833,11067767,1,complaint_id
8,64833,2,Yes,64622,timely
9,64833,1,Consent provided,64833,consumer_consent_provided


In [8]:
type(profile)

pandas.core.frame.DataFrame

In [9]:
profile

Unnamed: 0,product,complaint_what_happened,date_sent_to_company,issue,sub_product,zip_code,tags,complaint_id,timely,consumer_consent_provided,company_response,submitted_via,company,date_received,state,consumer_disputed,company_public_response,sub_issue
count,64833,64833,64833,64833,64833,64833,4139,64833,64833,64833,64833,64833,64833,64833,64455,64833.0,39630,63402
unique,11,38727,51,84,53,5652,3,64833,2,1,5,1,1027,31,58,1.0,10,197
top,Credit reporting or other personal consumer re...,My credit reports are inaccurate. These inaccu...,2024-12-10T12:00:00-05:00,Incorrect information on your report,Credit reporting,XXXXX,Servicemember,11067767,Yes,Consent provided,Closed with explanation,Web,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",2024-12-05T12:00:00-05:00,TX,,Company has responded to the consumer and the ...,Information belongs to someone else
freq,52346,1314,3769,28394,51997,2185,2764,1,64622,64833,34593,64833,20125,3799,9467,64833.0,38195,15554


In [10]:
profile_transpose = profile.T

In [11]:
profile_transpose

Unnamed: 0,count,unique,top,freq
product,64833,11,Credit reporting or other personal consumer re...,52346
complaint_what_happened,64833,38727,My credit reports are inaccurate. These inaccu...,1314
date_sent_to_company,64833,51,2024-12-10T12:00:00-05:00,3769
issue,64833,84,Incorrect information on your report,28394
sub_product,64833,53,Credit reporting,51997
zip_code,64833,5652,XXXXX,2185
tags,4139,3,Servicemember,2764
complaint_id,64833,64833,11067767,1
timely,64833,2,Yes,64622
consumer_consent_provided,64833,1,Consent provided,64833


In [16]:
type(profile_transpose)

pandas.core.frame.DataFrame

In [17]:
profile_transpose.index

Index(['product', 'complaint_what_happened', 'date_sent_to_company', 'issue',
       'sub_product', 'zip_code', 'tags', 'complaint_id', 'timely',
       'consumer_consent_provided', 'company_response', 'submitted_via',
       'company', 'date_received', 'state', 'consumer_disputed',
       'company_public_response', 'sub_issue'],
      dtype='object')

In [30]:
profile_transpose["id"] = list(range(0,len(profile_transpose.index)))

profile_transpose.set_index("id", inplace=True)

In [31]:
profile_transpose

Unnamed: 0_level_0,count,unique,top,freq,ID
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,64833,11,Credit reporting or other personal consumer re...,52346,product
1,64833,38727,My credit reports are inaccurate. These inaccu...,1314,complaint_what_happened
2,64833,51,2024-12-10T12:00:00-05:00,3769,date_sent_to_company
3,64833,84,Incorrect information on your report,28394,issue
4,64833,53,Credit reporting,51997,sub_product
5,64833,5652,XXXXX,2185,zip_code
6,4139,3,Servicemember,2764,tags
7,64833,64833,11067767,1,complaint_id
8,64833,2,Yes,64622,timely
9,64833,1,Consent provided,64833,consumer_consent_provided


In [32]:
profile_transpose.index

Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], dtype='int64', name='id')