# 1. Introductions

# 2. Import Library

In [1]:
# for data manipulation 
import pandas as pd 

# for data labeling
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# 3. Load Data

In [2]:
# Load clean Data
df_raw = pd.read_csv('data/consumer_complaints_cleaned.csv')
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2221 entries, 0 to 2220
Data columns (total 18 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   complaint_id                  2221 non-null   int64 
 1   date_received                 2221 non-null   object
 2   date_sent_to_company          2221 non-null   object
 3   product                       2221 non-null   object
 4   sub_product                   2221 non-null   object
 5   issue                         2221 non-null   object
 6   sub_issue                     2221 non-null   object
 7   consumer_complaint_narrative  2221 non-null   object
 8   company_public_response       2221 non-null   object
 9   company                       2221 non-null   object
 10  state                         2221 non-null   object
 11  zip_code                      2221 non-null   object
 12  tags                          2221 non-null   object
 13  consumer_consent_p

# 4. Select text value 

In [3]:
# Select column for labeling
data = df_raw['issue'] + ' ' + df_raw['sub_issue'] + ' ' + df_raw['consumer_complaint_narrative']
data

0       Can't stop charges to bank account Can't stop ...
1       Cont'd attempts collect debt not owed Debt is ...
2       Disclosure verification of debt Right to dispu...
3       Cont'd attempts collect debt not owed Debt is ...
4       Cont'd attempts collect debt not owed Debt is ...
                              ...                        
2216    Disclosure verification of debt Not given enou...
2217    Cont'd attempts collect debt not owed Debt was...
2218    Taking/threatening an illegal action Threatene...
2219    False statements or representation Attempted t...
2220    Cont'd attempts collect debt not owed Debt res...
Length: 2221, dtype: object

# 5. Get label for text

In [4]:
# Initialize empty array for storing results
results = []

# Create an instance of SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

# Analyze each sentence in data_model
for sentence in data:
    vs = analyzer.polarity_scores(sentence)
    results.append(vs)
    print(sentence)
    print(vs)
    print()

# Initialize lists for storing sentiment categories
light = []
harsh = []
mid = []

# neutral_count = 0

# Threshold for determining sentiment categories
light_threshold = 0.1
mild_threshold = 0.75
harsh_threshold = 0.1

# Categorize each result based on sentiment scores
for result in results:
    #for negative sentiment
    if result['neg'] >= harsh_threshold:
        harsh.append(1)
    else:
        harsh.append(0)

    #for positive sentiment
    if result['pos'] >= light_threshold:
        light.append(1)
    else:
        light.append(0)

    #for neutral sentiment
    if result['neu'] >= mild_threshold:
        mid.append(1)
    else:
        mid.append(0)

# Convert data_model to a DataFrame if it is not already
data = pd.DataFrame(data, columns=['sentence'])

# Add sentiment categories to the DataFrame
data['light_complaints'] = light
data['harsh_complaints'] = harsh
data['mild_complaints'] = mid

print('light_complaints: ', data['light_complaints'].sum())
print('harsh_complaints: ', data['harsh_complaints'].sum())
print('mild_complaints:   ', data['mild_complaints'].sum())

Can't stop charges to bank account Can't stop charges to bank account I took out a Loan from Cash Central XXXX, Al for {$300.00} sense that time I lost my job and also my medications went through the roof so I was unable to pay the Loan back. Every sense that time I have been receiving phone calls everyday. Not a day pass they are calling me and they always call from a different number so I never know who it is calling. They have caused me to pay out so much money at my Bank, at the time my Bank was XXXX XXXX in XXXX, Al. They have caused me to pay out hundreds of dollars in overdraft fees, money that I could have paid them with but they would not stop trying to debit my account. They call every day starting at around XXXX I am telling them the same thing each time they call me that I do not have any money to pay them now if I did have the money I would pay them each month until I paid them up but until my income increase I am not able to pay them. I am on a fix income and I can not pa

# 6. Save the data 

In [5]:
data.to_csv('data/consumer_complaints_labeled.csv', index=False)