### Merge similar classes together

In [1]:
import pandas as pd
import numpy as np

from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from nltk.tokenize import word_tokenize
import warnings
warnings.filterwarnings("ignore")

In [2]:
data = pd.read_csv('cleaned_narrative.csv')

In [3]:
data.Issue.value_counts().to_dict()

{'Incorrect information on your report': 6593,
 "Problem with a credit reporting company's investigation into an existing problem": 3307,
 'Attempts to collect debt not owed': 2543,
 'Incorrect information on credit report': 2081,
 'Improper use of your report': 1848,
 "Cont'd attempts collect debt not owed": 1698,
 'Loan servicing, payments, escrow account': 1515,
 'Communication tactics': 1243,
 'Trouble during payment process': 1146,
 'Written notification about debt': 1119,
 'Loan modification,collection,foreclosure': 1094,
 'False statements or representation': 977,
 'Managing an account': 892,
 'Dealing with your lender or servicer': 816,
 'Dealing with my lender or servicer': 787,
 'Struggling to pay mortgage': 786,
 'Disclosure verification of debt': 753,
 'Managing the loan or lease': 738,
 'Took or threatened to take negative or legal action': 697,
 'Account opening, closing, or management': 601,
 'Problem with a purchase shown on your statement': 598,
 "Credit reporting comp

### Merge same labels

In [4]:
data = data.dropna(subset=['clean_sentences'])

In [5]:
data.loc[((data.Issue == 'Dealing with your lender or servicer') |
          (data.Issue == 'Dealing with my lender or servicer')),
         'Issue'] = 'Dealing with your lender or servicer'

In [6]:
data.loc[((data.Issue == 'Problem adding money') |
          (data.Issue == 'Adding money')),
         'Issue'] = 'Adding money'

In [7]:
data.loc[((data.Issue == 'Managing an account') |
          (data.Issue == 'Account opening, closing, or management') |
          (data.Issue == 'Opening an account') |
          (data.Issue == 'Closing an account') |
          (data.Issue == 'Closing your account') |
          (data.Issue == 'Closing/Cancelling account') |
          (data.Issue == 'Managing, opening, or closing account') |
          (data.Issue == 'Problem getting a card or closing an account')),
         'Issue'] = 'Account opening, closing, or management'

In [8]:
data.loc[((data.Issue == "Problem with a credit reporting company's investigation into an existing problem") | 
          (data.Issue == "Credit reporting company's investigation")),
         'Issue'] = "Credit reporting company's investigation"

In [9]:
data.loc[((data.Issue == "Trouble during payment process") | 
          (data.Issue == "Problem when making payments")),
         'Issue'] = "Problem when making payments"

In [10]:
data.loc[((data.Issue == 'Fees or interest') |
          (data.Issue == "Charged fees or interest you didn't expect") |
          (data.Issue == "Charged fees or interest I didn't expect") |
          (data.Issue == 'Unexpected or other fees') |
          (data.Issue == 'Fees') |
          (data.Issue == 'Excessive fees') |
          (data.Issue == 'Balance transfer fee') |
          (data.Issue == 'Cash advance fee') |
          (data.Issue == 'Excessive fees') |
          (data.Issue == 'Balance transfer fee') |
          (data.Issue == 'Overlimit fee') |
          (data.Issue == 'Other feeOverlimit fee')),
         'Issue'] = 'Fees or interest'

In [11]:
data.loc[((data.Issue == 'Overdraft, savings or rewards features') |
          (data.Issue == 'Problem with overdraft') |
          (data.Issue == 'Overdraft, savings, or rewards features')),
         'Issue'] = 'Overdraft, savings or rewards features'

In [12]:
data.loc[((data.Issue == 'Vehicle was repossessed or sold the vehicle') |
          (data.Issue == 'Lender repossessed or sold the vehicle')),
         'Issue'] = 'Lender repossessed or sold the vehicle'

In [13]:
data.loc[((data.Issue == 'Advertising and marketing, including promotional offers') |
          (data.Issue == "Advertising and marketing") |
          (data.Issue == "Confusing or misleading advertising or marketing") |
          (data.Issue == 'Advertising, marketing or disclosures') |
          (data.Issue == 'Advertising')),
         'Issue'] = 'Advertising and marketing'

In [14]:
data.loc[((data.Issue == "Problems caused by my funds being low") | 
          (data.Issue == "Problem caused by your funds being low")),
         'Issue'] = "Problems caused by your funds being low"

In [15]:
data.loc[((data.Issue == "Can't repay my loan") |
          (data.Issue == 'Struggling to repay your loan') |
          (data.Issue == 'Struggling to pay your loan')),
         'Issue'] = 'Struggling to pay your loan'

In [16]:
data.loc[((data.Issue == 'Improper use of my credit report') |
          (data.Issue == 'Improper use of your report')),
         'Issue'] = 'Improper use of your report'

In [17]:
data.loc[((data.Issue == 'Incorrect information on your report') |
          (data.Issue == 'Incorrect information on credit report')),
         'Issue'] = 'Incorrect information on your report'

In [18]:
data.loc[((data.Issue == "Received a loan I didn't apply for") |
          (data.Issue == "Received a loan you didn't apply for")),
         'Issue'] = "Received a loan you didn't apply for"

In [19]:
data.loc[((data.Issue == "Unable to get credit report/credit score") |
          (data.Issue == "Unable to get your credit report or credit score")),
         'Issue'] = "Unable to get your credit report or credit score"

In [20]:
data.loc[((data.Issue == "Was approved for a loan, but didn't receive the money") |
          (data.Issue == "Was approved for a loan, but didn't receive money")),
         'Issue'] = "Was approved for a loan, but didn't receive money"

In [21]:
data.loc[((data.Issue == "Can't contact lender") |
          (data.Issue == "Can't contact lender or servicer")),
         'Issue'] = "Can't contact lender or servicer"

In [22]:
len(data.Issue.unique())

121

### Merge similar labels using Doc2Vec

In [23]:
# Prepare data for training our doc2vec model, tag is corresponding issue
tagged_docu = [TaggedDocument(words=word_tokenize(_d), tags=[data.iloc[i,3]]) for i, _d in enumerate(data.clean_sentences)]
tagged_docu

[TaggedDocument(words=['i', 'issued', 'balance', 'checks', 'us', 'bank', 'platinum', 'balance', 'transfers', 'months', 'i', 'late', 'previous', 'payment', 'i', 'called', 'got', 'resolved', 'company', 'i', 'assured', 'taken', 'care', 'interest', 'rate', 'still', 'she', 'offered', 'transfer', 'additional', 'balances', 'interest', 'rate', 'months', 'they', 'sent', 'next', 'bill', 'charging', 'interest', 'saying', 'account', 'delinquent', 'i', 'called', 'day', 'bill', 'arrived', 'woman', 'said', 'would', 'remove', 'interest', 'rate', 'increase', 'i', 'closed', 'account', 'this', 'company', 'lied', 'tricked', 'transferring', 'balances', 'high', 'interest', 'r'], tags=['APR or interest rate']),
 TaggedDocument(words=['i', 'applied', 'credit', 'card', 'wells', 'fargo', 'agreed', 'interest', 'would', 'percent', 'two', 'yrs', 'after', 'two', 'yrs', 'passed', 'noticed', 'company', 'charges', 'percent', 'interest', 'i', 'notified', 'requesting', 'close', 'credit', 'card', 'account', 'refused', 'n

In [24]:
# train the doc2vec model

doc_model = Doc2Vec(size=300, alpha=0.025, min_alpha=0.00025, min_count=2, dm=1, negative=5)
  
doc_model.build_vocab(tagged_docu)

max_epochs = 100
for epoch in range(max_epochs):
    print('iteration {0}'.format(epoch))
    doc_model.train(tagged_docu,
                    total_examples=doc_model.corpus_count,
                    epochs=doc_model.iter)
    # decrease the learning rate
    doc_model.alpha -= 0.0002
    # fix the learning rate, no decay
    doc_model.min_alpha = doc_model.alpha

iteration 0
iteration 1
iteration 2
iteration 3
iteration 4
iteration 5
iteration 6
iteration 7
iteration 8
iteration 9
iteration 10
iteration 11
iteration 12
iteration 13
iteration 14
iteration 15
iteration 16
iteration 17
iteration 18
iteration 19
iteration 20
iteration 21
iteration 22
iteration 23
iteration 24
iteration 25
iteration 26
iteration 27
iteration 28
iteration 29
iteration 30
iteration 31
iteration 32
iteration 33
iteration 34
iteration 35
iteration 36
iteration 37
iteration 38
iteration 39
iteration 40
iteration 41
iteration 42
iteration 43
iteration 44
iteration 45
iteration 46
iteration 47
iteration 48
iteration 49
iteration 50
iteration 51
iteration 52
iteration 53
iteration 54
iteration 55
iteration 56
iteration 57
iteration 58
iteration 59
iteration 60
iteration 61
iteration 62
iteration 63
iteration 64
iteration 65
iteration 66
iteration 67
iteration 68
iteration 69
iteration 70
iteration 71
iteration 72
iteration 73
iteration 74
iteration 75
iteration 76
iteration

In [25]:
from sklearn.externals import joblib
joblib.dump(doc_model, "doc_model_1.pkl")

['doc_model_1.pkl']

In [143]:
# Precompute L2-normalized vectors.
doc_model.docvecs.init_sims(replace=True)

In [144]:
# Retrieve vector of each issue
from sklearn.metrics import pairwise_distances

issue_number = list(data.Issue.unique())
vec_list = [doc_model.docvecs[i] for i in issue_number]

# Calculate pairwise difference between each issue
for n in range(len(issue_number)):
    distances = pairwise_distances(np.array(vec_list), metric="cosine")
distances

array([[0.        , 0.7053012 , 0.8014173 , ..., 0.94295764, 0.8517046 ,
        0.8032702 ],
       [0.7053012 , 0.        , 0.8603256 , ..., 0.8512286 , 0.88920504,
        0.7184564 ],
       [0.8014173 , 0.8603256 , 0.        , ..., 0.9441911 , 0.8445908 ,
        1.0279274 ],
       ...,
       [0.94295764, 0.8512286 , 0.9441911 , ..., 0.        , 0.851984  ,
        0.84984326],
       [0.8517046 , 0.88920504, 0.8445908 , ..., 0.851984  , 0.        ,
        0.9293971 ],
       [0.8032702 , 0.7184564 , 1.0279274 , ..., 0.84984326, 0.9293971 ,
        0.        ]], dtype=float32)

In [146]:
# Compile pairwise distance into a dataframe
distance_df = pd.DataFrame(data=np.array(distances), index=issue_number, columns=issue_number)
distance_df = distance_df.where(np.triu(np.ones(distance_df.shape), k=1).astype(np.bool))
distance_df

Unnamed: 0,APR or interest rate,"Account opening, closing, or management",Account terms and changes,Adding money,Advertising and marketing,Application processing delay,"Application, originator, mortgage broker",Applied for loan/did not receive money,Applying for a mortgage,Applying for a mortgage or refinancing an existing mortgage,...,Trouble using your card,Unable to get your credit report or credit score,Unauthorized transactions or other transaction problem,Unauthorized transactions/trans. issues,Unsolicited issuance of credit card,Using a debit or ATM card,Vehicle was damaged or destroyed the vehicle,"Was approved for a loan, but didn't receive money",Written notification about debt,Wrong amount charged or received
APR or interest rate,,0.705301,0.801417,0.891814,0.535061,0.770939,0.953240,0.928830,0.972535,0.954682,...,0.632601,0.868482,0.917410,0.757523,0.592244,0.744466,0.850062,0.942958,0.851705,0.803270
"Account opening, closing, or management",,,0.860326,0.685425,0.513095,0.812839,0.975541,0.857729,1.022604,0.941051,...,0.648070,0.923292,0.604406,0.601748,0.695519,0.371083,0.997170,0.851229,0.889205,0.718456
Account terms and changes,,,,0.889129,0.895841,0.917127,0.943661,0.815012,0.999380,0.966959,...,0.806847,0.898384,0.963606,0.796233,0.809472,0.890588,0.898509,0.944191,0.844591,1.027927
Adding money,,,,,0.747126,0.889832,1.026306,0.798048,0.978016,0.997155,...,0.841713,0.908863,0.798350,0.751091,0.867279,0.777832,0.884554,0.883693,0.849865,0.858008
Advertising and marketing,,,,,,0.689458,1.029033,0.898388,0.909744,0.975950,...,0.595662,0.790397,0.733174,0.738160,0.587701,0.676089,0.948447,0.798266,0.855668,0.800792
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Using a debit or ATM card,,,,,,,,,,,...,,,,,,,0.985254,0.861541,0.944018,0.768961
Vehicle was damaged or destroyed the vehicle,,,,,,,,,,,...,,,,,,,,0.896764,0.801192,0.955944
"Was approved for a loan, but didn't receive money",,,,,,,,,,,...,,,,,,,,,0.851984,0.849843
Written notification about debt,,,,,,,,,,,...,,,,,,,,,,0.929397


In [29]:
# Select lables with distance less than 0.45
indices = np.where(distance_df < 0.45)
indices = [[distance_df.index[x], distance_df.columns[y]] for x, y in zip(*indices)]

# Merge similar labels to be a list
from itertools import product, groupby
# Inner lists to sets
merge_list = [set(x) for x in indices]

# Merge elements if some element in common
for a,b in product(merge_list, merge_list):
    if a.intersection(b):
        a.update(b)
        b.update(a)

# Back to list of lists
merge_list = sorted([sorted(list(x)) for x in merge_list])

# Remove dups
merge_list1 = list(merge_list for merge_list,_ in groupby(merge_list))
merge_list1

[['Account opening, closing, or management',
  'Deposits and withdrawals',
  'Making/receiving payments, sending money',
  'Problem with a lender or other company charging your account',
  'Problems caused by your funds being low',
  'Using a debit or ATM card'],
 ['Advertising and marketing',
  'Other features, terms, or problems',
  'Rewards'],
 ['Application, originator, mortgage broker',
  'Applying for a mortgage or refinancing an existing mortgage',
  'Closing on a mortgage',
  'Credit decision / Underwriting',
  'Loan modification,collection,foreclosure',
  'Loan servicing, payments, escrow account',
  'Problem when making payments',
  'Settlement process and costs',
  'Struggling to pay mortgage'],
 ['Attempts to collect debt not owed',
  'Communication tactics',
  "Cont'd attempts collect debt not owed",
  'Disclosure verification of debt',
  'False statements or representation',
  'Improper contact or sharing of info',
  'Taking/threatening an illegal action',
  'Threatened t

In [147]:
# Find label in every group which has the highest frequency in the dataset
issue_dict = data.Issue.value_counts().to_dict()

list_1 = []
list_2 = []
for i in merge_list1:
    d = {m:0 for m in i}
    for key, value in d.items():
        if key in issue_dict:
            d[key] = issue_dict[key]
    list_1.append(d)
    a = {key: d[key] for key in sorted(d, key=d.get, reverse=True)[:1]}
    list_2.append(a)    

group_label1 = []
for i in list_2:
    for key, value in i.items():
        group_label1.append(key)

group_label1

['Account opening, closing, or management',
 'Other features, terms, or problems',
 'Loan servicing, payments, escrow account',
 'Attempts to collect debt not owed',
 'Problem with a purchase shown on your statement',
 'Credit monitoring or identity theft protection services',
 'Incorrect information on your report',
 'Dealing with your lender or servicer',
 'Managing the loan or lease',
 'Money was not available when promised']

In [32]:
# Repeat the process on tags with less documents and reduce trained words vector size
label_list = data.Issue.value_counts().to_dict()
less_label_list = dict((k, v) for k, v in label_list.items() if v <= 50)

In [34]:
data_less = data[data.Issue.isin(less_label_list)]

# Prepare data for training our doc2vec model, tag is corresponding issue
tagged_docu = [TaggedDocument(words=word_tokenize(_d), tags=[data_less.iloc[i,3]]) for i, _d in enumerate(data_less.clean_sentences)]
tagged_docu

# train the doc2vec model

doc_model_2 = Doc2Vec(size=50, alpha=0.025, min_alpha=0.00025, min_count=2, dm=1, negative=5)
doc_model_2.build_vocab(tagged_docu)

max_epochs = 100
for epoch in range(max_epochs):
    print('iteration {0}'.format(epoch))
    doc_model_2.train(tagged_docu,
                      total_examples=doc_model_2.corpus_count,
                      epochs=doc_model_2.iter)
    # decrease the learning rate
    doc_model_2.alpha -= 0.0002
    # fix the learning rate, no decay
    doc_model_2.min_alpha = doc_model_2.alpha


iteration 0
iteration 1
iteration 2
iteration 3
iteration 4
iteration 5
iteration 6
iteration 7
iteration 8
iteration 9
iteration 10
iteration 11
iteration 12
iteration 13
iteration 14
iteration 15
iteration 16
iteration 17
iteration 18
iteration 19
iteration 20
iteration 21
iteration 22
iteration 23
iteration 24
iteration 25
iteration 26
iteration 27
iteration 28
iteration 29
iteration 30
iteration 31
iteration 32
iteration 33
iteration 34
iteration 35
iteration 36
iteration 37
iteration 38
iteration 39
iteration 40
iteration 41
iteration 42
iteration 43
iteration 44
iteration 45
iteration 46
iteration 47
iteration 48
iteration 49
iteration 50
iteration 51
iteration 52
iteration 53
iteration 54
iteration 55
iteration 56
iteration 57
iteration 58
iteration 59
iteration 60
iteration 61
iteration 62
iteration 63
iteration 64
iteration 65
iteration 66
iteration 67
iteration 68
iteration 69
iteration 70
iteration 71
iteration 72
iteration 73
iteration 74
iteration 75
iteration 76
iteration

In [35]:
from sklearn.externals import joblib
joblib.dump(doc_model_2, "doc_model_2.pkl")

['doc_model_2.pkl']

In [158]:
# Precompute L2-normalized vectors.
doc_model_2.docvecs.init_sims(replace=True)

# Retrieve vector of each issue
from sklearn.metrics import pairwise_distances

issue_number = data_less.Issue.unique()
vec_list = [doc_model_2.docvecs[i] for i in issue_number]

# Calculate pairwise difference between each issue
for n in range(len(issue_number)):
    distances = pairwise_distances(np.array(vec_list), metric="cosine")
distances

# Compile pairwise distance into a dataframe
distance_df = pd.DataFrame(data=np.array(distances), index=issue_number, columns=issue_number)
distance_df = distance_df.where(np.triu(np.ones(distance_df.shape), k=1).astype(np.bool))
distance_df

Unnamed: 0,Account terms and changes,Adding money,Application processing delay,Applied for loan/did not receive money,Applying for a mortgage,Arbitration,Balance transfer,Bankruptcy,Can't contact lender or servicer,Can't stop charges to bank account,...,Sale of account,Shopping for a line of credit,Struggling to pay your bill,Trouble using the card,Unauthorized transactions or other transaction problem,Unauthorized transactions/trans. issues,Unsolicited issuance of credit card,Vehicle was damaged or destroyed the vehicle,"Was approved for a loan, but didn't receive money",Wrong amount charged or received
Account terms and changes,,0.818053,0.761953,0.709796,0.822243,0.682347,0.712403,0.434356,0.527992,0.786362,...,0.59985,0.555965,0.651138,0.687926,0.770389,0.570882,0.653688,0.784137,0.821067,0.917066
Adding money,,,0.620319,0.753963,0.726117,0.555627,0.635731,0.670305,0.740974,0.641255,...,0.635801,0.693036,0.751998,0.53744,0.589414,0.568335,0.846858,0.62143,0.747342,0.638577
Application processing delay,,,,0.661754,0.574134,0.561401,0.765517,0.887932,0.688343,0.670913,...,0.717661,0.67346,0.848256,0.590248,0.759123,0.652116,0.64914,0.57846,0.664253,0.926985
Applied for loan/did not receive money,,,,,0.547886,0.752703,0.814436,0.655566,0.53399,0.412307,...,0.806433,0.706921,0.668047,0.80841,0.810067,0.667621,0.676925,0.631739,0.451125,0.857185
Applying for a mortgage,,,,,,0.853878,0.783497,0.907718,0.676047,0.485882,...,0.756158,0.627216,0.797491,0.887713,0.791416,0.854862,0.755614,0.630873,0.615667,0.824684
Arbitration,,,,,,,0.67029,0.652845,0.745955,0.767316,...,0.548383,0.843237,0.59927,0.761758,0.713813,0.60877,0.454676,0.715856,0.880706,0.656288
Balance transfer,,,,,,,,0.615566,0.858434,0.75324,...,0.709589,0.93662,0.736869,0.815639,0.690603,0.838431,0.480063,0.82857,0.742,0.741401
Bankruptcy,,,,,,,,,0.743569,0.704278,...,0.627498,0.541708,0.492825,0.705176,0.814695,0.636835,0.74439,0.646419,0.713319,0.74079
Can't contact lender or servicer,,,,,,,,,,0.424743,...,0.75931,0.690376,0.625312,0.79069,0.783849,0.77843,0.806436,0.616311,0.502509,0.876208
Can't stop charges to bank account,,,,,,,,,,,...,0.678456,0.818301,0.598572,0.798139,0.746174,0.750272,0.757375,0.536514,0.59328,0.882418


In [159]:
# Select lables with distance less than 0.45
indices = np.where(distance_df < 0.45)
indices = [[distance_df.index[x], distance_df.columns[y]] for x, y in zip(*indices)]

# Merge similar labels to be a list
from itertools import product, groupby
# Inner lists to sets
merge_list = [set(x) for x in indices]

# Merge elements if some element in common
for a,b in product(merge_list, merge_list):
    if a.intersection(b):
        a.update(b)
        b.update(a)

# Back to list of lists
merge_list = sorted([sorted(x) for x in merge_list])

# Remove dups
merge_list2 = [merge_list for merge_list,_ in groupby(merge_list)]
merge_list2

[['Account terms and changes', 'Bankruptcy'],
 ['Adding money', 'Overdraft, savings or rewards features'],
 ['Applied for loan/did not receive money',
  'Applying for a mortgage',
  "Can't contact lender or servicer",
  "Can't stop charges to bank account",
  "Can't stop withdrawals from your bank account",
  'Charged bank acct wrong day or amt',
  'Confusing or missing disclosures',
  'Getting the loan',
  'Lender repossessed or sold the vehicle',
  'Lender sold the property',
  "Loan payment wasn't credited to your account",
  'Money was taken from your bank account on the wrong day or for the wrong amount',
  'Payment to acct not credited',
  'Problem with additional add-on products or services',
  "Received a loan you didn't apply for",
  'Vehicle was damaged or destroyed the vehicle',
  "Was approved for a loan, but didn't receive money"],
 ['Cash advance', 'Forbearance / Workout plans'],
 ['Credit determination', 'Managing the line of credit'],
 ['Incorrect exchange rate', 'Wrong

In [192]:
list_1 = []
list_2 = []
for i in merge_list2:
    d = {m:0 for m in i}
    for key, value in d.items():
        if key in issue_dict:
            d[key] = issue_dict[key]
    list_1.append(d)
    a = {key: d[key] for key in sorted(d, key=d.get, reverse=True)[:1]}
    list_2.append(a)    

group_label2 = []
for i in list_2:
    for key, value in i.items():
        group_label2.append(key)

group_label2

['Bankruptcy',
 'Adding money',
 "Can't contact lender or servicer",
 'Forbearance / Workout plans',
 'Credit determination',
 'Wrong amount charged or received',
 'Lost or stolen check',
 'Managing, opening, or closing your mobile wallet account',
 'Unsolicited issuance of credit card',
 'Unauthorized transactions/trans. issues']

In [193]:
# Replace issue with its similar issue which are also most frequent
merge_list = merge_list2 + merge_list1
group_label = group_label2 + group_label1

In [199]:
data['cluster_Issue'] = data['Issue']
for _, label_list in enumerate(merge_list):
    for i in range(len(data['cluster_Issue'])):
        if data.iloc[i,22] in label_list:
            for n in group_label:
                if n in label_list:
                    data.iloc[i,22] = n

In [200]:
len(data["cluster_Issue"].unique())

63

In [201]:
data["cluster_Issue"].unique()

array(['APR or interest rate', 'Account opening, closing, or management',
       'Bankruptcy', 'Adding money', 'Other features, terms, or problems',
       'Application processing delay',
       'Loan servicing, payments, escrow account',
       "Can't contact lender or servicer", 'Arbitration',
       'Attempts to collect debt not owed', 'Balance transfer',
       'Fees or interest',
       'Problem with a purchase shown on your statement',
       'Billing statement', 'Dealing with your lender or servicer',
       'Forbearance / Workout plans', 'Convenience checks',
       'Credit card protection / Debt protection', 'Credit determination',
       'Credit limit changed', 'Credit line increase/decrease',
       'Credit monitoring or identity theft protection services',
       'Incorrect information on your report',
       'Customer service / Customer relations',
       'Customer service/Customer relations', 'Delinquent account',
       'Disclosures', 'Fraud or scam', 'Getting a credit c

In [202]:
data.to_csv('cluster_issue.csv', index = False)