### Import Relevant Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud

### Generate Master DF

In [2]:
master_df = pd.read_csv('Consumer_Complaints.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
master_df.head()

Unnamed: 0,Date received,Product,Sub-product,Issue,Sub-issue,Consumer complaint narrative,Company public response,Company,State,ZIP code,Tags,Consumer consent provided?,Submitted via,Date sent to company,Company response to consumer,Timely response?,Consumer disputed?,Complaint ID
0,10/01/2019,"Payday loan, title loan, or personal loan",Installment loan,Struggling to pay your loan,,,Company believes it acted appropriately as aut...,"Atlas Credit Company, Inc.",TX,75703,,Consent not provided,Web,10/01/2019,Closed with explanation,Yes,,3391722
1,10/01/2019,Debt collection,Other debt,False statements or representation,Attempted to collect wrong amount,,Company has responded to the consumer and the ...,"ProCollect, Inc",TX,79936,Servicemember,Consent not provided,Web,10/01/2019,Closed with explanation,Yes,,3391649
2,10/01/2019,Debt collection,Auto debt,Written notification about debt,Notification didn't disclose it was an attempt...,,,NAVY FEDERAL CREDIT UNION,CA,91915,,,Web,10/01/2019,In progress,Yes,,3391379
3,10/01/2019,"Credit reporting, credit repair services, or o...",Credit reporting,Incorrect information on your report,Account information incorrect,,,OKLAHOMA STUDENT LOAN AUTHORITY,IN,47130,,,Web,10/01/2019,Closed with explanation,Yes,,3391378
4,10/01/2019,Debt collection,Medical debt,Attempts to collect debt not owed,Debt is not yours,,Company disputes the facts presented in the co...,"Eastern Account Systems of Connecticut, Inc.",CT,6401,,Other,Web,10/01/2019,Closed with explanation,Yes,,3391434


### Import TfidfVectorizer to get Word Vectors for Every Issue

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [5]:
vectorizer = TfidfVectorizer(stop_words='english')

In [6]:
response = vectorizer.fit_transform(master_df.Issue.unique())

In [7]:
vec_df = pd.DataFrame(response.toarray(), columns=vectorizer.get_feature_names())

### Import PCA to reduce dimensionality for proper clustering

In [8]:
from sklearn.decomposition import PCA

In [9]:
explained_variance = []

for component in range(100,2,-1):
    
    n_components = component

    vectors = vec_df.values

    pca = PCA(n_components)

    dim_reduction = pca.fit_transform(vectors)

    explained_variance.append(sum(pca.explained_variance_ratio_)*100)

In [10]:
n_components = 50

vectors = vec_df.values

pca = PCA(n_components)

dim_reduction = pca.fit_transform(vectors)

explained_var = round(100*sum(pca.explained_variance_ratio_), 3)

print(explained_var, '%')

71.492 %


### Import KMeans to Cluster the Reduced Vectors

In [11]:
from sklearn.cluster import KMeans

In [12]:
cluster_score = []

for cluster in range(20,2,-1):
    
    num_clusters = cluster
    
    kmeans = KMeans()
    
    clusters = kmeans.fit_predict(dim_reduction)
    
    cluster_score.append((cluster, kmeans.inertia_))

In [59]:
num_clusters = 50

n_init = 500
    
kmeans = KMeans(num_clusters, n_init=n_init, random_state=18)
    
clusters = kmeans.fit_predict(dim_reduction)

print(clusters)

[ 2  7 47 14 36 32 35 40 25 33 27 21  4  7 28 13  2 37  1 21  7  5  1 17
 42  0 10 32  9 40  1 41 29 18 24 28 39 12 43  6 42 10 49 48  7 13 31  4
 15 10  2 46  9 29 20  7 20 29 44  6 29 26  3 48 22 26  9  0 15 30 13 22
  4 30 16 34 45 19 34  0 11 23  6 10 14  7 37 49  7 28 38 44 17 32 36 47
 25 48 28 10 33 24 20 42 46  7  8 27 47 40  5  0 48  1 21 33  8 40 35 19
 45  0 37  0  4 43 12  4 27 26  3  7 21 15  0 44 43 22  3 41 41 18 31 49
 49  7  7  7  7 24 24 39 13  7  8 19 16 13 16 23 23 11  5 38 38  0]


In [60]:
cluster_df = pd.DataFrame(clusters)

cluster_df['Issue'] = master_df.Issue.unique()

In [61]:
cluster_df.columns = ['Cluster', 'Issue']

In [62]:
cluster_df = cluster_df.sort_values('Cluster')

In [63]:
cluster_df.head(25)

Unnamed: 0,Cluster,Issue
165,0,Credit reporting
134,0,Unsolicited issuance of credit card
123,0,Credit determination
121,0,Unable to get credit report/credit score
111,0,Credit decision / Underwriting
79,0,Problem with credit report or credit score
67,0,Credit limit changed
25,0,Unable to get your credit report or credit score
113,1,"Managing, opening, or closing account"
30,1,Opening an account


### Snapshot of Clusters with Issue and Value Counts

In [64]:
for cluster in range(50):
    print(cluster_df.loc[cluster_df['Cluster'] == cluster].head(3), 
          len(cluster_df.loc[cluster_df['Cluster'] == cluster]))

     Cluster                                Issue
165        0                     Credit reporting
134        0  Unsolicited issuance of credit card
123        0                 Credit determination 8
     Cluster                                              Issue
113        1              Managing, opening, or closing account
30         1                                 Opening an account
22         1  Managing, opening, or closing your mobile wall... 4
    Cluster                        Issue
50        2  Struggling to pay your bill
16        2   Struggling to pay mortgage
0         2  Struggling to pay your loan 3
     Cluster                                  Issue
62         3          Problem with customer service
130        3    Customer service/Customer relations
138        3  Customer service / Customer relations 3
    Cluster                                              Issue
72        4                                        Advertising
12        4   Confusing or misleading 

In [66]:
cluster_df[cluster_df['Cluster']==7]

Unnamed: 0,Cluster,Issue
147,7,Convenience checks
13,7,Communication tactics
55,7,Problem with additional add-on products or ser...
85,7,Other
88,7,"Application, originator, mortgage broker"
105,7,Taking/threatening an illegal action
20,7,Took or threatened to take negative or legal a...
153,7,Privacy
44,7,Money was not available when promised
131,7,Application processing delay


In [67]:
df = master_df.merge(cluster_df, how='inner', on='Issue')

In [68]:
df['Cluster'].value_counts()

14    274762
7     117426
38    114212
36    112971
37     91391
32     81810
47     61050
10     54725
35     44266
28     39825
1      37886
33     31751
17     31710
0      29562
40     29186
2      27199
44     23578
42     21505
5      18177
46     16865
25     15981
48     14623
9       9819
6       9184
13      9041
29      8904
20      8186
49      7881
21      7524
4       6991
8       6052
12      5581
45      5571
3       4333
24      3719
19      2986
41      2627
43      2487
22       989
18       881
27       705
31       645
16       484
26       459
39       332
30       272
15       250
34        23
23        14
11        12
Name: Cluster, dtype: int64

In [69]:
df[df['Cluster']==2].head()

Unnamed: 0,Date received,Product,Sub-product,Issue,Sub-issue,Consumer complaint narrative,Company public response,Company,State,ZIP code,Tags,Consumer consent provided?,Submitted via,Date sent to company,Company response to consumer,Timely response?,Consumer disputed?,Complaint ID,Cluster
0,10/01/2019,"Payday loan, title loan, or personal loan",Installment loan,Struggling to pay your loan,,,Company believes it acted appropriately as aut...,"Atlas Credit Company, Inc.",TX,75703.0,,Consent not provided,Web,10/01/2019,Closed with explanation,Yes,,3391722,2
1,09/29/2019,"Payday loan, title loan, or personal loan",Title loan,Struggling to pay your loan,,,,CURO Intermediate Holdings,AZ,,,,Web,09/29/2019,Closed with explanation,Yes,,3389959,2
2,09/27/2019,Vehicle loan or lease,Loan,Struggling to pay your loan,Lender trying to repossess or disable the vehicle,,,NAVY FEDERAL CREDIT UNION,CA,92324.0,,,Referral,09/28/2019,In progress,Yes,,3389309,2
3,09/27/2019,"Payday loan, title loan, or personal loan",Installment loan,Struggling to pay your loan,,,Company believes it acted appropriately as aut...,"Atlas Credit Company, Inc.",TX,75703.0,,Consent not provided,Web,09/27/2019,Closed with explanation,Yes,,3388210,2
4,09/25/2019,Vehicle loan or lease,Loan,Struggling to pay your loan,Lender trying to repossess or disable the vehicle,,Company believes it acted appropriately as aut...,"American Credit Acceptance, LLC",MD,20706.0,,,Phone,09/25/2019,Closed with explanation,Yes,,3385477,2
