In [6]:
import pandas as pd
import numpy as np
import os

In [7]:
password = os.environ['POSTGRES_PASSWORD']
username = os.environ['POSTGRES_USER']
database = os.environ['POSTGRES_DBNAME']
host = os.environ['POSTGRES_HOST']
port = os.environ['POSTGRES_PORT']

In [8]:
# Load the SQL extension
%load_ext sql

# Connect to the PostgreSQL database using SQLAlchemy format
%sql postgresql://{username}:{password}@{host}:{port}/{database}


The sql extension is already loaded. To reload it, use:
  %reload_ext sql


### 1 Product Analysis

#### 1.1 What products are the complaints about, and how many complaints are there for each product?

In [5]:
%%sql
SELECT 
    product,
    COUNT(*) AS count,
    ROUND(COUNT(*) * 100.0 / (SELECT COUNT(product) FROM complaints WHERE product IS NOT NULL), 2) AS percentage
FROM
    complaints
GROUP BY
    product
ORDER BY
    2 DESC

 * postgresql://postgres:***@localhost:5432/complaints
11 rows affected.


product,count,percentage
Mortgage,186475,33.54
Debt collection,101052,18.18
Credit reporting,91854,16.52
Credit card,66468,11.96
Bank account or service,62563,11.25
Consumer Loan,20990,3.78
Student loan,15839,2.85
Payday loan,3877,0.7
Money transfers,3812,0.69
Prepaid card,2470,0.44


#### 1.2 Given that mortgage is the product that leads to most complaints, what are the complaints distribution of sub product for mortgage?

In [10]:
%%sql
SELECT
    sub_product,
    COUNT(sub_product) AS count
FROM
    complaints
WHERE
    product = 'Mortgage'
GROUP BY
    sub_product
ORDER BY
    COUNT(sub_product) DESC


 * postgresql://postgres:***@localhost:5432/complaints
8 rows affected.


sub_product,count
Other mortgage,74319
Conventional fixed mortgage,57182
Conventional adjustable mortgage (ARM),20941
FHA mortgage,19152
Home equity loan or line of credit,8944
VA mortgage,3735
Reverse mortgage,1537
Second mortgage,665


### 2 Issue type analysis

#### 2.1 What are the top 10 common issues that cause complaints?

In [16]:
%%sql
SELECT
    issue,
    COUNT(issue) AS count
FROM
    complaints
GROUP BY
    issue
ORDER BY
    COUNT(issue) DESC
LIMIT 10

 * postgresql://postgres:***@localhost:5432/complaints
10 rows affected.


issue,count
"Loan modification,collection,foreclosure",97191
Incorrect information on credit report,66718
"Loan servicing, payments, escrow account",60375
Cont'd attempts collect debt not owed,42285
"Account opening, closing, or management",26661
Communication tactics,18293
Disclosure verification of debt,18292
Deposits and withdrawals,17195
"Application, originator, mortgage broker",13306
Billing disputes,11042


#### 2.2 What are the top 10 common sub issues that cause complaints?

In [24]:
%%sql
SELECT
    sub_issue,
    COUNT(sub_issue) AS count
FROM
    complaints
WHERE
    sub_issue != 'Unknown'
GROUP BY
    sub_issue
ORDER BY
    COUNT(sub_issue) DESC
LIMIT 10

 * postgresql://postgres:***@localhost:5432/complaints
10 rows affected.


sub_issue,count
Account status,26798
Debt is not mine,26285
Information is not mine,19900
Not given enough info to verify debt,12496
Debt was paid,11328
Frequent or repeated calls,11320
Account terms,7240
Attempted to collect wrong amount,6308
Public record,5573
Problem getting my free annual report,4937


### 3 Company Analysis

#### 3.1 What are the top 10 companies that received the most complaints?

In [26]:
%%sql
SELECT 
    DISTINCT company, 
    COUNT(*) as number_of_complaints,
    RANK() OVER (ORDER BY COUNT(*) DESC) AS rank
FROM complaints
GROUP BY company
ORDER BY number_of_complaints DESC
LIMIT 10;

 * postgresql://postgres:***@localhost:5432/complaints
10 rows affected.


company,number_of_complaints,rank
Bank of America,55998,1
Wells Fargo & Company,42024,2
JPMorgan Chase & Co.,33881,3
Equifax,31828,4
Experian,30905,5
Citibank,25540,6
"TransUnion Intermediate Holdings, Inc.",25534,7
Ocwen,20978,8
Capital One,15628,9
Nationstar Mortgage,13250,10


#### 3.2 What are the companies with the most complaints by state?

In [27]:
%%sql
WITH ranked_companies_cte AS(
    SELECT
        state,
        company,
        COUNT(company) AS number_of_complaints,
        RANK() OVER (PARTITION BY state ORDER BY COUNT(company) DESC) AS rank
    FROM
        complaints
    GROUP BY
        state, company
)

SELECT
    state,
    company,
    number_of_complaints
FROM
    ranked_companies_cte
WHERE
    rank=1

 * postgresql://postgres:***@localhost:5432/complaints
68 rows affected.


state,company,number_of_complaints
AA,Capital One,2
AA,Wells Fargo & Company,2
AE,Bank of America,17
AE,Experian,17
AK,Wells Fargo & Company,98
AL,Equifax,451
AP,Bank of America,18
AR,Bank of America,228
AS,Bank of Hawaii,2
AZ,Bank of America,1399


### 4 Demographical Analysis

#### 4.1 What are the products with the most complaints by state?

In [23]:
%%sql
WITH ranked_products_cte AS(
    SELECT
        state,
        product,
        COUNT(product) AS number_of_complaints,
        RANK() OVER (PARTITION BY state ORDER BY COUNT(product) DESC) AS rank
    FROM
        complaints
    GROUP BY
        state, product
)

SELECT
    state,
    product,
    number_of_complaints
FROM
    ranked_products_cte
WHERE
    rank=1 AND state!= 'Unknown'

 * postgresql://postgres:***@localhost:5432/complaints
63 rows affected.


state,product,number_of_complaints
AA,Mortgage,4
AE,Mortgage,68
AK,Mortgage,157
AL,Mortgage,1395
AP,Debt collection,46
AR,Mortgage,619
AS,Bank account or service,5
AZ,Mortgage,4358
CA,Mortgage,32988
CO,Mortgage,3303
