source for walkthrough: https://towardsdatascience.com/multi-class-text-classification-with-scikit-learn-12f1e60e0a9f

download data: https://catalog.data.gov/dataset/consumer-complaint-database

In [2]:
import pandas as pd

In [15]:
df = pd.read_csv('data/Consumer_Complaints.csv')
df = df.head(18000)
df.head()

Unnamed: 0,Date received,Product,Sub-product,Issue,Sub-issue,Consumer complaint narrative,Company public response,Company,State,ZIP code,Tags,Consumer consent provided?,Submitted via,Date sent to company,Company response to consumer,Timely response?,Consumer disputed?,Complaint ID
0,03/12/2014,Mortgage,Other mortgage,"Loan modification,collection,foreclosure",,,,M&T BANK CORPORATION,MI,48382,,,Referral,03/17/2014,Closed with explanation,Yes,No,759217
1,10/01/2016,Credit reporting,,Incorrect information on credit report,Account status,I have outdated information on my credit repor...,Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",AL,352XX,,Consent provided,Web,10/05/2016,Closed with explanation,Yes,No,2141773
2,10/17/2016,Consumer Loan,Vehicle loan,Managing the loan or lease,,I purchased a new car on XXXX XXXX. The car de...,,"CITIZENS FINANCIAL GROUP, INC.",PA,177XX,Older American,Consent provided,Web,10/20/2016,Closed with explanation,Yes,No,2163100
3,06/08/2014,Credit card,,Bankruptcy,,,,AMERICAN EXPRESS COMPANY,ID,83854,Older American,,Web,06/10/2014,Closed with explanation,Yes,Yes,885638
4,09/13/2014,Debt collection,Credit card,Communication tactics,Frequent or repeated calls,,,"CITIBANK, N.A.",VA,23233,,,Web,09/13/2014,Closed with explanation,Yes,Yes,1027760


In [16]:
from io import StringIO
col = ['Product', 'Consumer complaint narrative']
df = df[col]
df = df[pd.notnull(df['Consumer complaint narrative'])]
df.columns = ['Product', 'Consumer_complaint_narrative']
df['category_id'] = df['Product'].factorize()[0]
category_id_df = df[['Product', 'category_id']].drop_duplicates().sort_values('category_id')
category_to_id = dict(category_id_df.values)
id_to_category = dict(category_id_df[['category_id', 'Product']].values)
df.head()

Unnamed: 0,Product,Consumer_complaint_narrative,category_id
1,Credit reporting,I have outdated information on my credit repor...,0
2,Consumer Loan,I purchased a new car on XXXX XXXX. The car de...,1
7,Credit reporting,An account on my credit report has a mistaken ...,0
12,Debt collection,This company refuses to provide me verificatio...,2
16,Debt collection,This complaint is in regards to Square Two Fin...,2


In [17]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer(sublinear_tf=True, min_df=5, norm='l2', encoding='latin-1', ngram_range=(1, 2), stop_words='english')
features = tfidf.fit_transform(df.Consumer_complaint_narrative).toarray()
labels = df.category_id
features.shape

(3797, 10626)

In [18]:
from sklearn.feature_selection import chi2
import numpy as np
N = 2
for Product, category_id in sorted(category_to_id.items()):
    features_chi2 = chi2(features, labels == category_id)
    indices = np.argsort(features_chi2[0])
    feature_names = np.array(tfidf.get_feature_names())[indices]
    unigrams = [v for v in feature_names if len(v.split(' ')) == 1]
    bigrams = [v for v in feature_names if len(v.split(' ')) == 2]
    print("# '{}':".format(Product))
    print("  . Most correlated unigrams:\n. {}".format('\n. '.join(unigrams[-N:])))
    print("  . Most correlated bigrams:\n. {}".format('\n. '.join(bigrams[-N:])))

# 'Bank account or service':
  . Most correlated unigrams:
. overdraft
. checking
  . Most correlated bigrams:
. overdraft fees
. checking account
# 'Checking or savings account':
  . Most correlated unigrams:
. pdf
. limitation
  . Most correlated bigrams:
. called citibank
. deposited xxxx
# 'Consumer Loan':
  . Most correlated unigrams:
. car
. vehicle
  . Most correlated bigrams:
. vehicle xxxx
. xxxx vehicle
# 'Credit card':
  . Most correlated unigrams:
. citi
. card
  . Most correlated bigrams:
. american express
. credit card
# 'Credit card or prepaid card':
  . Most correlated unigrams:
. 2018
. gift
  . Most correlated bigrams:
. xxxx cents
. xx 2018
# 'Credit reporting':
  . Most correlated unigrams:
. experian
. equifax
  . Most correlated bigrams:
. verify accounts
. credit report
# 'Credit reporting, credit repair services, or other personal consumer reports':
  . Most correlated unigrams:
. plate
. didn
  . Most correlated bigrams:
. consumer report
. valid xxxx
# 'Debt 

# a little work to remove newlines from scraped text

In [19]:
paragraphs_jc = [["\n\n\n\n \n\n\n139.84\n\n\n\n\n\n \n\n\n.86\n\n\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n$\n\n\n3.39\n\n\n\n\n\n\n\nPurchases of Equity Securities by the Issuer\nOn 15 September 2011, the Board of Directors authorized the repurchase of up\nto $1.0 billion of our outstanding common stock. This program\ndoes not have a stated expiration date. We repurchase shares pursuant to Rules 10b5-1 and 10b-18 under the\nSecurities Exchange Act of 1934, as amended, through repurchase agreements established with several brokers.\nThere were no purchases of stock during fiscal year 2017.\nAt 30 September 2017, $485.3\nmillion in share repurchase authorization remained. Additional purchases will be\ncompleted at the Company's discretion while maintaining sufficient funds for investing in its businesses and\ngrowth opportunities.\n15",
  '\n\nTable of Contents\nFinancing and Capital Structure\nCapital needs in 2017 were\nsatisfied primarily with cash from operations. At the end of 2017, total debt outstanding was $3,962.8 compared\nto $5,210.9 at the end of 2016, and cash and cash items were $3,273.6\ncompared to $1,293.2 at the end of\n2016.\nOn 31 March 2017, we entered into a five-year $2,500.0 revolving credit\nagreement with a syndicate of banks (the "2017 Credit Agreement"), under which senior unsecured debt is\navailable to both the Company and certain of its subsidiaries. The 2017 Credit Agreement provides a source of\nliquidity for the Company and supports its commercial paper program. The Company\'s only financial covenant is a\nmaximum ratio of total debt to total capitalization (total debt plus total equity) no greater than 70%. Total\ndebt at 30 September 2017 and 2016, expressed as a percentage of total capitalization (total debt plus total\nequity), was 28.0% and 41.9%, respectively. No borrowings were outstanding under the 2017 Credit\nAgreement as of 30 September 2017.\nThe 2017 Credit Agreement terminates and replaces our previous $2,690.0 revolving credit\nagreement (the "2013 Credit Agreement"), which was to mature 30 April 2018. No borrowings were outstanding\nunder the previous agreement at the time of its termination, and no early termination penalties were\nincurred.\nCommitments totaling $23.4 are maintained by our foreign subsidiaries, all of which was borrowed and outstanding at\n30 September 2017.\nAs of 30 September 2017,\nwe are in compliance with all of the financial and other covenants under our debt agreements.\nOn 15 September 2011, the Board of Directors authorized the repurchase of up\nto $1,000 of our outstanding common stock. We did not\npurchase any of our outstanding shares during fiscal years 2017, 2016 or 2015. At 30 September 2017, $485.3 in share repurchase authorization remains.\n2018 Outlook\nCash flows from operations and financing activities are expected to meet\nliquidity needs for the foreseeable future and our working capital balance was $3,387.7 at 30 September 2017.\nWe expect that we will continue to be in compliance with all of our financial covenants.\nOn 16 October 2017, we repaid a 1.2% Senior Note of $400 that matured on 15 October 2017. As\nof 30 September 2017, this note was reflected in current portion of long-term debt on the consolidated balance\nsheets.\nDividends\nDividends are declared by the Board of Directors and are usually paid during\nthe sixth week after the close of the fiscal quarter. During 2017, the Board of Directors increased the quarterly dividend from $.86 per share to $.95 per\nshare.',
  "\n\nTable of Contents\nThe unconditional purchase obligations also include other product supply and purchase\ncommitments and electric power and natural gas supply purchase obligations, which are primarily pass-through\ncontracts with our customers.\nPurchase commitments to spend approximately $300 for additional plant and equipment are included in the unconditional\npurchase obligations in 2018. In addition, we have purchase\ncommitments totaling approximately $180 in\n2018 relating to our long-term sale of equipment project for Saudi\nAramco's Jazan oil refinery.\n\n\n18. CAPITAL STOCK\nCommon Stock\nAuthorized common stock consists of 300\nmillion shares with a par value of $1 per share. As of 30 September 2017, 249 million shares were issued, with 218 million\noutstanding.\nOn 15 September 2011, the Board of Directors authorized the repurchase of up\nto $1,000 of our outstanding common stock. We repurchase\nshares pursuant to Rules 10b5-1 and 10b-18 under the Securities Exchange Act of 1934, as amended, through\nrepurchase agreements established with several brokers. We did not purchase any of our outstanding shares\nduring fiscal year 2017. At 30 September 2017,\n$485.3 in share repurchase authorization remains.\nThe following table reflects the changes in common shares:"],
 ['\n\nAs a holding company, the Parent Company depends on the ability of its subsidiaries to\ntransfer funds to it to meet its debt service and other obligations and to pay dividends on its common\nstock.\nThe Parent Company is a holding company and has no direct operations, and its most significant\nassets are the stock of its subsidiaries. Because the Parent Company conducts its operations through its\noperating subsidiaries, the Parent Company depends on those entities for dividends and other payments to\ngenerate the funds necessary to meet its debt service and other obligations, and to pay dividends on and\nconduct repurchases of its common stock.',
  "\n\n\n32\n\n\n\nStock Performance Graph\nThe following graph compares the five-year performance of the Company's common stock to the\nStandard & Poor's 500 Index (S&P 500) and the Standard & Poor's Life and Health Insurance Index\n(S&P Life and Health). The Standard & Poor's Life and Health Insurance Index includes: Aflac\nIncorporated, Brighthouse Financial Inc., Lincoln National Corporation, MetLife Inc., Principal Financial Group\nInc., Prudential Financial Inc., Torchmark Corporation and Unum Group.\n \n\nPerformance Graphic Index\nDecember 31,\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n2012\n\n\n\n\n\n \n\n\n2013\n\n\n\n\n\n \n\n\n2014\n\n\n\n\n\n \n\n\n2015\n\n\n\n\n\n \n\n\n2016\n\n\n\n\n\n \n\n\n2017\n\n\n\n\n\n\n\nAflac Incorporated\n\n\n100.00\n\n\n\n\n\n \n\n\n128.91\n\n\n\n\n\n \n\n\n120.81\n\n\n\n\n\n \n\n\n121.44\n\n\n\n\n\n \n\n\n144.65\n\n\n\n\n\n \n\n\n186.59\n\n\n\n\n\n\n\nS&P 500\n\n\n100.00\n\n\n\n\n\n \n\n\n132.39\n\n\n\n\n\n \n\n\n150.51\n\n\n\n\n\n \n\n\n152.59\n\n\n\n\n\n \n\n\n170.84\n\n\n\n\n\n \n\n\n208.14\n\n\n\n\n\n\n\nS&P Life & Health Insurance\n\n\n100.00\n\n\n\n\n\n \n\n\n163.48\n\n\n\n\n\n \n\n\n166.66\n\n\n\n\n\n \n\n\n156.14\n\n\n\n\n\n \n\n\n194.96\n\n\n\n\n\n \n\n\n226.98\n\n\n\n\n\n\n\nCopyright©\n2018 Standard & Poor's, a division of S&P Global. All rights reserved.\n\n33\n\n\n\nIssuer Purchases of Equity Securities\nDuring the year ended December 31,\n2017, we repurchased shares of Aflac common stock as follows:\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nPeriod\n\n\nTotal Number of Shares Purchased\n\n\n \n\n\nAverage Price Paid Per Share\n\n\n \n\n\nTotal Number of Shares Purchased as Part of Publicly Announced Plans or Programs\n\n\n \n\n\nMaximum     Number of     Shares that     May Yet Be     Purchased     Under the     Plans or\n    Programs    \n\n\n \n\n\n\n\nJanuary 1 - January 31\n\n\n \n\n\n3,819,299\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n$\n\n\n69.94\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n3,819,299\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n22,934,155\n\n\n\n\n\n \n\n\n \n\n\n\n\nFebruary 1 - February 28\n\n\n \n\n\n1,988,420\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n70.03\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n1,853,000\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n21,081,155\n\n\n\n\n\n \n\n\n \n\n\n\n\nMarch 1 - March 31\n\n\n \n\n\n2,825,614\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n72.12\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n2,821,009\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n18,260,146\n\n\n\n\n\n \n\n\n \n\n\n\n\nApril 1 - April 30\n\n\n \n\n\n1,764,523\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n73.70\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n1,764,523\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n16,495,623\n\n\n\n\n\n \n\n\n \n\n\n\n\nMay 1 - May 31\n\n\n \n\n\n501\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n74.84\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n0\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n16,495,623\n\n\n\n\n\n \n\n\n \n\n\n\n\nJune 1  - June 30\n\n\n \n\n\n902,308\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n78.08\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n896,795\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n15,598,828\n\n\n\n\n\n \n\n\n \n\n\n\n\nJuly 1 - July 31\n\n\n \n\n\n1,066,100\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n77.88\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n1,066,100\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n14,532,728\n\n\n\n\n\n \n\n\n \n\n\n\n\nAugust 1 - August 31\n\n\n \n\n\n1,356,142\n\n",
  '\n\n\n\n\n\n \n\n\n(1) \n\n\n\n\n(1)The total remaining\nshares available for purchase at December 31, 2017,\nconsisted of 8,998,386 shares related to a 40,000,000 share\nrepurchase authorization by the board of directors in 2015 and 40,000,000 shares related to a 40,000,000 share\nrepurchase authorization by the board of directors announced in August 2017.\n(2)During the year\nended December 31, 2017, 156,456 shares were purchased in connection with income tax withholding\nobligations related to the vesting of restricted-share-based awards during the period.\n\n\n34\n\n\n\n\nITEM 6.     SELECTED FINANCIAL DATA\n\nAflac Incorporated and Subsidiaries\nYears Ended December 31,\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n(In millions, except for share and per-share amounts)\n\n\n2017\n\n\n \n\n\n2016\n\n\n \n\n\n2015\n\n\n \n\n\n2014\n\n\n \n\n\n2013\n\n\n\n\nRevenues:\n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n\n\nNet premiums, principally supplemental health insurance\n\n\n$\n\n\n18,531\n\n\n\n\n\n \n\n\n$\n\n\n19,225\n\n\n\n\n\n \n\n\n$\n\n\n17,570\n\n\n\n\n\n \n\n\n$\n\n\n19,072\n\n\n\n\n\n \n\n\n$\n\n\n20,135\n\n\n\n\n\n\n\nNet investment income\n\n\n3,220\n\n\n\n\n\n \n\n\n3,278\n\n\n\n\n\n \n\n\n3,135\n\n\n\n\n\n \n\n\n3,319\n\n\n\n\n\n \n\n\n3,293\n\n\n\n\n\n\n\nRealized investment gains (losses)\n\n\n(151\n\n\n)\n\n\n \n\n\n(14\n\n\n)\n\n\n \n\n\n106\n\n\n\n\n\n \n\n\n282\n\n\n\n\n\n \n\n\n426\n\n\n\n\n\n\n\nOther income\n\n\n67\n\n\n\n\n\n \n\n\n70\n\n\n\n\n\n \n\n\n61\n\n\n\n\n\n \n\n\n55\n\n\n\n\n\n \n\n\n85\n\n\n\n\n\n\n\nTotal revenues\n\n\n21,667\n\n\n\n\n\n \n\n\n22,559\n\n\n\n\n\n \n\n\n20,872\n\n\n\n\n\n \n\n\n22,728\n\n\n\n\n\n \n\n\n23,939\n\n\n\n\n\n\n\nBenefits and expenses:\n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n\n\nBenefits and claims, net\n\n\n12,181\n\n\n\n\n\n \n\n\n12,919\n\n\n\n\n\n \n\n\n11,746\n\n\n\n\n\n \n\n\n12,937\n\n\n\n\n\n \n\n\n13,813\n\n\n\n\n\n\n\nExpenses\n\n\n5,468\n\n\n\n\n\n \n\n\n5,573\n\n\n\n\n\n \n\n\n5,264\n\n\n\n\n\n \n\n\n5,300\n\n\n\n\n\n \n\n\n5,310\n\n\n\n\n\n\n\nTotal benefits and expenses\n\n\n17,649\n\n\n\n\n\n \n\n\n18,492\n\n\n\n\n\n \n\n\n17,010\n\n\n\n\n\n \n\n\n18,237\n\n\n\n\n\n \n\n\n19,123\n\n\n\n\n\n\n\nPretax earnings\n\n\n4,018\n\n\n\n\n\n \n\n\n4,067\n\n\n\n\n\n \n\n\n3,862\n\n\n\n\n\n \n\n\n4,491\n\n\n\n\n\n \n\n\n4,816\n\n\n\n\n\n\n\nIncome taxes\n\n\n(586\n\n\n)\n\n\n \n\n\n1,408\n\n\n\n\n\n \n\n\n1,329\n\n\n\n\n\n \n\n\n1,540\n\n\n\n\n\n \n\n\n1,658\n\n\n\n\n\n\n\nNet earnings\n\n\n$\n\n\n4,604\n\n\n\n\n\n \n\n\n$\n\n\n2,659\n\n\n\n\n\n \n\n\n$\n\n\n2,533\n\n\n\n\n\n \n\n\n$\n\n\n2,951\n\n\n\n\n\n \n\n\n$\n\n\n3,158\n\n\n\n\n\n\n\nShare and Per-Share Amounts\n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n\n\nNet earnings (basic)\n\n\n$\n\n\n11.63\n\n\n\n\n\n \n\n\n$\n\n\n6.46\n\n\n\n\n\n \n\n\n$\n\n\n5.88\n\n\n\n\n\n \n\n\n$\n\n\n6.54\n\n\n\n\n\n \n\n\n$\n\n\n6.80\n\n\n\n\n\n\n\nNet earnings (diluted)\n\n\n11.54\n\n\n\n\n\n \n\n\n6.42\n\n\n\n\n\n \n\n\n5.85\n\n\n\n\n\n \n\n\n6.50\n\n\n\n\n\n \n\n\n6.76\n\n\n\n\n\n\n\nCash dividends paid\n\n\n1.74\n\n\n\n\n\n \n\n\n1.66\n\n\n\n\n\n \n\n\n1.58\n\n\n\n\n\n \n\n\n1.50\n\n\n\n\n\n \n\n\n1.42\n\n\n\n\n\n\n\nCash dividends declared\n\n\n1.74\n\n\n\n\n\n \n\n\n1.66\n\n\n\n\n\n \n\n\n1.58\n\n\n\n\n\n \n\n\n1.50\n\n\n\n\n\n \n\n\n1.42\n\n\n\n\n\n\n\nWeighted-average common shares used for basic EPS (In thousands)\n\n\n396,021\n\n\n\n\n\n \n\n\n411,471\n\n\n\n\n\n ',
  "\n\nThe Company repurchased 17.8 million shares of its common stock in the open market for $1.35 billion\nunder the Company's share repurchase program in 2017, compared with the repurchase of 21.6\nmillion shares for $1.4 billion in 2016.\n\n\nCRITICAL ACCOUNTING ESTIMATES\n\nThe Company prepares its financial statements in accordance with U.S.\ngenerally accepted accounting principles (GAAP). These principles are established primarily by the Financial\nAccounting Standards Board (FASB). In this MD&A, references to U.S. GAAP issued by the FASB are derived\nfrom the FASB Accounting Standards CodificationTM\n(ASC). The preparation of financial statements in conformity with U.S. GAAP requires the\nCompany to make estimates based on currently available information when recording transactions resulting from\nbusiness operations. The estimates that the Company deems to be most critical to an understanding of Aflac's\nresults of operations and financial condition are those related to the valuation of investments and\nderivatives, deferred policy acquisition costs (DAC), liabilities for future policy benefits and unpaid policy\nclaims, and income taxes. The preparation and evaluation of these critical accounting estimates involve the use\nof various assumptions developed from management's analyses and judgments. The application of these critical\naccounting estimates determines the values at which 95% of\nthe Company's assets and 81% of its liabilities are reported\nas of December 31, 2017, and thus has a direct effect on net\nearnings and shareholders' equity. Subsequent experience or use of other assumptions could produce\nsignificantly different results.",
  '\n\n\n \n\n\n2.62\n\n\n%\n\n\n \n\n\n2.80\n\n\n%\n\n\n\n\n(1) Includes fixed\nmaturities and perpetual securities, loan receivables, equities, and excludes alternative investments in\nlimited partnerships\n(2) Reported on a gross\nyield basis; excludes investment expenses, external management fees, and amortized hedge costs\n(3) Net of investment\nexpenses and amortized hedge costs, year-to-date number reflected on a quarterly average basis\n\nOn January 1, 2016, the company revised its definition of purchases to include the\nreinvestment of proceeds related to unplanned sale activity. New purchases include all purchases related to\nfixed maturities and perpetuals, loan receivables, and equities. Securities lending/repurchase agreement\nactivity and capital contributions to alternatives are excluded. The definition of new money yield has also\nbeen revised to reflect this change. Yields for equities are based on the assumed dividend yield at the time of\npurchase.\n\nThe increase in the Aflac Japan new money yield in 2017 was primarily due to increases in yen\ninterest rates during much of the investment period as well as increased allocations to higher yielding asset\nclasses.',
  "\n\nOff-Balance Sheet Arrangements\n\nAs of December 31, 2017,\nthe Company had no material letters of credit, standby letters of credit, guarantees or standby repurchase\nobligations. See Note 15 of the Notes to the Consolidated Financial Statements for information on material\nunconditional purchase obligations that are not recorded on the Company's balance sheet.\n\nCAPITAL RESOURCES AND LIQUIDITY\nAflac provides the primary sources of liquidity to the Parent Company through dividends and\nmanagement fees. The following table presents the amounts provided for the years ended December 31.\nLiquidity Provided by Aflac to Parent Company",
  "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n(In millions)\n\n\n2017\n\n\n \n\n\n2016\n\n\n \n\n\n2015\n\n\n \n\n\n\n\nDividends declared or paid by Aflac\n\n\n$\n\n\n2,590\n\n\n\n\n\n(1) \n\n\n$\n\n\n2,000\n\n\n\n\n\n \n\n\n$\n\n\n2,393\n\n\n\n\n\n \n\n\n\n\nManagement fees paid by Aflac\n\n\n291\n\n\n\n\n\n \n\n\n260\n\n\n\n\n\n \n\n\n255\n\n\n\n\n\n \n\n\n\n\n(1) Includes securities\nof $622 at fair value which had a value of $656 at amortized cost\n\nThe primary uses of cash by the Parent Company are shareholder dividends, the repurchase of\nits common stock and interest on its outstanding indebtedness and operating expenses. The Parent Company's\nsources and uses of cash are reasonably predictable and are not expected to change materially in the future.\nFor additional information, see the Financing Activities subsection of this MD&A.",
  "\n\nThe Company's financial statements convey its financing arrangements during\nthe periods presented. The Company has not engaged in material intra-period short-term financings during the\nperiods presented that are not otherwise reported in its balance sheet or disclosed herein. The Company was in\ncompliance with all of the covenants of its notes payable and lines of credit at December 31, 2017. The Company has not entered into transactions involving the\ntransfer of financial assets with an obligation to repurchase financial assets that have been accounted for as\na sale under applicable accounting standards, including securities lending transactions. See Notes 1, 3, and 4\nof the Notes to the Consolidated Financial Statements for more information on the Company's securities lending\nand derivative activities. With the exception of disclosed activities in those referenced footnotes, the\nCompany does not have a known trend, demand, commitment, event or uncertainty that would reasonably result in\nits liquidity increasing or decreasing by a material amount. The Company's cash and cash equivalents include\nunrestricted cash on hand, money market instruments, and other debt instruments with a maturity of 90 days or\nless when purchased, all of which has minimal market, settlement or other risk exposure.",
  "\n\n\n1,277\n\n\n\n\n\n \n\n\n1,852\n\n\n\n\n\n \n\n\n1,770\n\n\n\n\n\n \n\n\n\n\n\n69\n\n\n\n\nUnder share repurchase authorizations from the Company's board of\ndirectors, the Company purchased 17.8 million shares of its\ncommon stock in the open market in 2017, compared with\n21.6 million shares in 2016\nand 21.2 million shares in\n2015. In August 2017, Aflac's board of directors authorized the\npurchase of an additional 40 million shares of its common stock. As of December 31,\n2017, a remaining balance of 49.0 million shares of the Company's common stock was available for purchase under share repurchase authorizations by\nits board of directors. The Company currently plans to purchase $1.1 billion\nto $1.4 billion of its common stock\nin 2018, assuming stable capital conditions and absent\ncompelling alternatives. See Note 11 of the Notes to the Consolidated Financial Statements for additional\ninformation.\n\nCash dividends paid to shareholders in 2017 of $1.74 per share\nincreased 4.8% over 2016. The 2016 dividend paid\nof $1.66 per share increased 5.1% over 2015. The following\ntable presents the dividend activity for the years ended December 31.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n(In millions)\n\n\n2017\n\n\n \n\n\n2016\n\n\n \n\n\n2015\n\n\n \n\n\n\n\nDividends paid in cash\n\n\n$\n\n\n661\n\n\n\n\n\n \n\n\n$\n\n\n658\n\n\n\n\n\n \n\n\n$\n\n\n656\n\n\n\n\n\n \n\n\n\n\nDividends through issuance of treasury shares\n\n\n29\n\n\n\n\n\n \n\n\n27\n\n\n\n\n\n \n\n\n26\n\n\n\n\n\n \n\n\n\n\nTotal dividends to shareholders\n\n\n$\n\n\n690\n\n\n\n\n\n \n\n\n$\n\n\n685\n\n\n\n\n\n ",
  "\n\nTransfers and Servicing - Repurchase-to-Maturity Transactions, Repurchase\nFinancings, and Disclosures: In June 2014, the FASB issued updated guidance for\nrepurchase agreement and security lending transactions to change the accounting for repurchase-to-maturity\ntransactions and linked repurchase financings to be accounted for as secured borrowings, consistent with the\naccounting for other repurchase agreements. The amendments also require new disclosures to increase\ntransparency about the types of collateral pledged in repurchase agreements and similar transactions accounted\nfor as secured borrowings. The Company adopted accounting changes for the new guidance as of January 1, 2015,\nand adopted the required disclosures as of April 1, 2015. The adoption of this guidance did not have a\nsignificant impact on the Company's financial position, results of operations, or disclosures.",
  "\n\n\n \n\n\nUp to 30 days\n\n\n \n\n\n \n\n\nTotal\n\n\n\n\nSecurities lending transactions:\n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n\n\nPublic utilities\n\n\n$\n\n\n62\n\n\n\n\n\n \n\n\n$\n\n\n0\n\n\n\n\n\n \n\n\n \n\n\n$\n\n\n62\n\n\n\n\n\n\n\nBanks/financial institutions\n\n\n34\n\n\n\n\n\n \n\n\n0\n\n\n\n\n\n \n\n\n \n\n\n34\n\n\n\n\n\n\n\nOther corporate\n\n\n430\n\n\n\n\n\n \n\n\n0\n\n\n\n\n\n \n\n\n \n\n\n430\n\n\n\n\n\n\n\n          Total borrowings\n\n\n$\n\n\n526\n\n\n\n\n\n \n\n\n$\n\n\n0\n\n\n\n\n\n \n\n\n \n\n\n$\n\n\n526\n\n\n\n\n\n\n\nGross amount of recognized liabilities for securities lending transactions\n\n\n \n\n\n$\n\n\n526\n\n\n\n\n\n\n\nAmounts related to agreements not included in offsetting disclosure in Note 4\n\n\n \n\n\n$\n\n\n0\n\n\n\n\n\n\n\n(1) These securities are\npledged as collateral under the Company's U.S. securities lending program and can be called at its discretion;\ntherefore, they are classified as Overnight and Continuous.\n\nThe Company did not have any repurchase agreements or\nrepurchase-to-maturity transactions outstanding as of December 31, 2017\nand 2016, respectively.\n\nCertain fixed-maturity securities can be pledged as collateral as part of derivative\ntransactions, or pledged to support state deposit requirements on certain investment programs. For additional\ninformation regarding pledged securities related to derivative transactions, see Note 4.\n\nAt December 31, 2017,\ndebt securities with a fair value of $21 million were on\ndeposit with regulatory authorities in the United States (including U.S. territories) and Japan. The Company\nretains ownership of all securities on deposit and receives the related investment income.",
  "\n\n\n398,930\n\n\n\n\n\n \n\n\n413,921\n\n\n\n\n\n \n\n\n433,172\n\n\n\n\n\n\n\n\nShare Repurchase Program: During\n2017, the Company repurchased 17.8\nmillion shares of its common stock in the open market, compared with\n21.6 million shares in 2016\nand 21.2 million shares in\n2015. In August 2017, the Company's board of directors authorized\nthe purchase of an additional 40 million shares of its common\nstock. As of December 31, 2017, a remaining balance of\n49.0 million shares of the Company's common stock was available for\npurchase under share repurchase authorizations by its board of directors.\n\nVoting Rights: In accordance with the Parent\nCompany's articles of incorporation, shares of common stock are generally entitled to one vote per share until they have been held by the same beneficial owner for a\ncontinuous period of 48 months, at which time they become\nentitled to 10 votes per share.\nReclassifications from Accumulated Other Comprehensive Income\nThe tables below are reconciliations of accumulated other comprehensive income by component\nfor the years ended December 31.\n\n\n154\n\n\n\nChanges in Accumulated Other Comprehensive Income\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n2017\n\n\n\n\n(In millions)\n\n\nUnrealized Foreign Currency Translation Gains (Losses)\n\n\n \n\n\nUnrealized Gains (Losses) on Investment Securities\n\n\n \n\n\nUnrealized Gains (Losses) on Derivatives\n\n\n \n\n\nPension Liability Adjustment\n\n\n \n\n\nTotal\n\n\n\n\nBalance, beginning of period\n\n\n \n\n\n$\n\n\n(1,983\n\n\n)\n\n\n \n\n\n \n\n\n \n\n\n$\n\n\n4,805\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n$\n\n\n(24\n\n\n)\n\n\n \n\n\n \n\n\n \n\n\n$\n\n\n(168\n\n\n)\n\n\n \n\n\n \n\n\n \n\n\n$\n\n\n2,630\n\n\n\n\n\n \n\n\n\n\nOther comprehensive income (loss) before reclassification\n\n\n \n\n\n233"]]

In [20]:
paragraphs_jc

[["\n\n\n\n \n\n\n139.84\n\n\n\n\n\n \n\n\n.86\n\n\n\n\n\n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n$\n\n\n3.39\n\n\n\n\n\n\n\nPurchases of Equity Securities by the Issuer\nOn 15 September 2011, the Board of Directors authorized the repurchase of up\nto $1.0 billion of our outstanding common stock. This program\ndoes not have a stated expiration date. We repurchase shares pursuant to Rules 10b5-1 and 10b-18 under the\nSecurities Exchange Act of 1934, as amended, through repurchase agreements established with several brokers.\nThere were no purchases of stock during fiscal year 2017.\nAt 30 September 2017, $485.3\nmillion in share repurchase authorization remained. Additional purchases will be\ncompleted at the Company's discretion while maintaining sufficient funds for investing in its businesses and\ngrowth opportunities.\n15",
  '\n\nTable of Contents\nFinancing and Capital Structure\nCapital needs in 2017 were\nsatisfied primarily with cash from operations. At the 

In [23]:
type(paragraphs_jc[0][0][0])

str

In [102]:
d1p1 = paragraphs_jc[0][0].replace("\n", " ")

In [103]:
d1p1

"        139.84          .86                                        $   3.39        Purchases of Equity Securities by the Issuer On 15 September 2011, the Board of Directors authorized the repurchase of up to $1.0 billion of our outstanding common stock. This program does not have a stated expiration date. We repurchase shares pursuant to Rules 10b5-1 and 10b-18 under the Securities Exchange Act of 1934, as amended, through repurchase agreements established with several brokers. There were no purchases of stock during fiscal year 2017. At 30 September 2017, $485.3 million in share repurchase authorization remained. Additional purchases will be completed at the Company's discretion while maintaining sufficient funds for investing in its businesses and growth opportunities. 15"

In [107]:
def remove_newlines(ind1, ind2):
    return(paragraphs_jc[ind1][ind2].replace("\n", " "))

In [110]:
d1p2 = remove_newlines(0, 1)

In [111]:
d1p2

'  Table of Contents Financing and Capital Structure Capital needs in 2017 were satisfied primarily with cash from operations. At the end of 2017, total debt outstanding was $3,962.8 compared to $5,210.9 at the end of 2016, and cash and cash items were $3,273.6 compared to $1,293.2 at the end of 2016. On 31 March 2017, we entered into a five-year $2,500.0 revolving credit agreement with a syndicate of banks (the "2017 Credit Agreement"), under which senior unsecured debt is available to both the Company and certain of its subsidiaries. The 2017 Credit Agreement provides a source of liquidity for the Company and supports its commercial paper program. The Company\'s only financial covenant is a maximum ratio of total debt to total capitalization (total debt plus total equity) no greater than 70%. Total debt at 30 September 2017 and 2016, expressed as a percentage of total capitalization (total debt plus total equity), was 28.0% and 41.9%, respectively. No borrowings were outstanding unde

In [119]:
# create function to replace newlines for a paragraph
def remove_newlines(ind1, ind2):
    """Remove newline characters with spaces from paragraph within list of lists"""
    return(paragraphs_jc[ind1][ind2].replace("\n", " "))

# get list of indicies for slicing through list of lists
indicies = list()
for d in range(len(paragraphs_jc)):
    for p in range(len(paragraphs_jc[d])):
        indicies.append([d, p])

# I know there are 2 documents in the list you gave me so created just two destination lists for cleaned paragraphs
# If want to scale maybe need to make this responsive to the number of documents scraped
doc_1 = list()
doc_2 = list()
for l in indicies:
    result = remove_newlines(l[0], l[1]) # run function to remove newlines, save to variable
    # Depending on which document it came from save it to one of the two destination lists
    if l[0] == 0:
        doc_1.append(result)
    else:
        doc_2.append(result)
        
# Combine two destination lists into one big list to resemble original format
no_newlines = [doc_1, doc_2]

# Print result to make sure it worked
no_newlines

[["        139.84          .86                                        $   3.39        Purchases of Equity Securities by the Issuer On 15 September 2011, the Board of Directors authorized the repurchase of up to $1.0 billion of our outstanding common stock. This program does not have a stated expiration date. We repurchase shares pursuant to Rules 10b5-1 and 10b-18 under the Securities Exchange Act of 1934, as amended, through repurchase agreements established with several brokers. There were no purchases of stock during fiscal year 2017. At 30 September 2017, $485.3 million in share repurchase authorization remained. Additional purchases will be completed at the Company's discretion while maintaining sufficient funds for investing in its businesses and growth opportunities. 15",
  '  Table of Contents Financing and Capital Structure Capital needs in 2017 were satisfied primarily with cash from operations. At the end of 2017, total debt outstanding was $3,962.8 compared to $5,210.9 at t