In [7]:
import pandas as pd
import sqlite3

### Importing the csv file as a DataFrame

In [14]:
df=pd.read_csv('consumer_complaints.csv')
df.shape

(65499, 12)

In [15]:
df.head()

Unnamed: 0,complaint_id,product,issue,company,state,zipcode,submitted_via,date_sent_to_company,date_received,company_response_to_consumer,timely_response,consumer_disputed?
0,511074,Mortgage,"Loan modification,collection,foreclosure",U.S. Bancorp,CA,95993,Referral,09/03/2013,08/30/2013,Closed with explanation,Yes,Yes
1,511080,Mortgage,"Loan servicing, payments, escrow account",Wells Fargo & Company,CA,91104,Referral,09/03/2013,08/30/2013,Closed with explanation,Yes,Yes
2,510473,Credit reporting,Incorrect information on credit report,Wells Fargo & Company,NY,11764,Postal mail,09/18/2013,08/30/2013,Closed with explanation,Yes,No
3,510326,Student loan,Repaying your loan,"Navient Solutions, Inc.",MD,21402,Email,08/30/2013,08/30/2013,Closed with explanation,Yes,Yes
4,511067,Debt collection,False statements or representation,Resurgent Capital Services L.P.,GA,30106,Web,08/30/2013,08/30/2013,Closed with explanation,Yes,Yes


### Creating a database

In [16]:
db = sqlite3.connect('complaints.db')

# defining our helper function for running queries
def run_query(query):
    return pd.read_sql_query(query,db)

### Loading the data into the database

In [17]:
df.to_sql(name='complaints', con=db, if_exists='append', index=False)

In [18]:
#checking that all the data was loaded
run_query("SELECT COUNT(*) FROM complaints")

Unnamed: 0,COUNT(*)
0,65499


### Computing the number of complaints per submission method

In [50]:
query='''
SELECT submitted_via AS 'Method', COUNT(*) AS 'Number'
FROM complaints
GROUP BY submitted_via
ORDER BY COUNT(*) DESC;
'''
run_query(query)

Unnamed: 0,Method,Number
0,Web,43947
1,Referral,10587
2,Phone,4974
3,Postal mail,4894
4,Fax,1076
5,Email,21


### Adding the total number of complaints

In [51]:
query='''
SELECT submitted_via AS 'Method', COUNT(*) AS 'Number',
   (SELECT COUNT(*) FROM complaints) AS 'Total'
FROM complaints
GROUP BY submitted_via
ORDER BY COUNT(*) DESC;
'''
run_query(query)

Unnamed: 0,Method,Number,Total
0,Web,43947,65499
1,Referral,10587,65499
2,Phone,4974,65499
3,Postal mail,4894,65499
4,Fax,1076,65499
5,Email,21,65499


### Computing the percentage of complaints of each submission method

In [57]:
query='''

SELECT *, (CAST (q1.Number AS double precision)/CAST (q1.Total AS double precision))*100 AS 'Percent'
FROM (SELECT submitted_via AS 'Method', COUNT(*) AS 'Number',
   (SELECT COUNT(*) FROM complaints) AS 'Total'
FROM complaints
GROUP BY submitted_via
ORDER BY COUNT(*) DESC) q1;
'''
run_query(query)

Unnamed: 0,Method,Number,Total,Percent
0,Web,43947,65499,67.095681
1,Referral,10587,65499,16.163606
2,Phone,4974,65499,7.594009
3,Postal mail,4894,65499,7.47187
4,Fax,1076,65499,1.642773
5,Email,21,65499,0.032062
