## Analyze trends in call content by looking for which issues are most frequent and what is the average count of issues per call.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline
import seaborn as sns
import requests as re

In [None]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

### Read the CSVs for NSSCrisisv1, NSSCrisisv2, CC2020, CC2021, CC2022

In [None]:
NSSCrisisv1 = pd.read_csv('../data/NSSCrisisv.1.csv')
print(NSSCrisisv1.shape)
NSSCrisisv1.head(3) 

In [None]:
header_row = NSSCrisisv1.iloc[1]
NSSCrisisv1 = pd.DataFrame(NSSCrisisv1.values[2:], columns=header_row)
NSSCrisisv1.head()

In [None]:
NSSCrisisv2 = pd.read_csv('../data/NSSCrisisv.2.csv')
print(NSSCrisisv2.shape) 
NSSCrisisv2.head(3)

In [None]:
header_row2 = NSSCrisisv2.iloc[1]
NSSCrisisv2 = pd.DataFrame(NSSCrisisv2.values[2:], columns=header_row2)
NSSCrisisv2.head()

In [None]:
CC2020 = pd.read_csv('../data/2020CallCenterVolume.csv')
CC2021 = pd.read_csv('../data/2021CallCenterVolume.csv')
CC2022 = pd.read_csv('../data/2022CallCenterVolume.csv')

In [None]:
CC2022.head()

In [None]:
labels = list()
for col in NSSCrisisv2.columns:
    labels.append(col.replace('Subjective', 'Issues'))
NSSCrisisv2.columns = labels

In [None]:
##Reading the column names. FYI - This will only show the entire list IF you do the set the display options in step 2 above.
NSSCrisisv1.dtypes

### Code below removes all the columns that have no data at all.

In [None]:
NSSCrisisv1.dropna(how='all', axis=1, inplace=True)

In [None]:
NSSCrisisv2.dropna(how='all', axis=1, inplace=True)

In [None]:
### This will remove columns that have 98% null values in the columns.

#pct_null = crisis1_df.isnull().sum() / len(crisis1_df)
#missing_features = pct_null[pct_null > 0.98].index
#crisis1_df.drop(missing_features, axis=1, inplace=True)


In [None]:
NSSCrisisv1.dtypes

In [None]:
#NSSCrisisv1['PhoneWorkerName'].value_counts()

In [None]:
NSSCrisisv2.dtypes

In [None]:
#NSSCrisisv2['PhoneWorkerName'].value_counts()

In [None]:
#NSSCrisisv1['ThirdPartyOrganization'].value_counts()

## Made two quick dfs based on a similar column.  This'll be one way to combine the datasets, but this only does a value count on the column.

NSSCrisisv1['CRISIS Demographics - Gender Identity'].value_counts()

gender1 = NSSCrisisv1['CRISIS Demographics - Gender Identity'].value_counts().rename_axis('CRISIS Demographics - Gender Identity').reset_index(name='counts')
NSS1.head()

gender2 = NSSCrisisv2['CRISIS Demographics - Gender Identity'].value_counts().rename_axis('CRISIS Demographics - Gender Identity').reset_index(name='counts')
NSS2.head()

gendermerge = [NSS1,NSS2]
demogender = pd.concat(merge)
demogender.head(10)

demogenderfinal = nssmerge.groupby(['CRISIS Demographics - Gender Identity']).sum()
demogenderfinal.head(10)

demogenderfinal.sort_values(by='counts', ascending=False)

## we created two dfs filtering the original dfs.

In [None]:
filter_col1 = [col for col in NSSCrisisv1 if col.startswith('CRISIS Issues') or col.startswith('Call')]
filter1 = NSSCrisisv1[filter_col1]

In [None]:
filter_col2 = [col for col in NSSCrisisv2 if col.startswith('CRISIS Issues') or col.startswith('Call')]
filter2 = NSSCrisisv2[filter_col2]

In [None]:
all_ci = pd.concat([filter1, filter2])
all_ci

In [None]:
all_ci.dropna(how='all', axis=0, inplace=True)

In [None]:
all_ci

In [None]:
#all_ci['CRISIS Issues - Substances'].value_counts(ascending=False)
#all_ci['CRISIS Issues - Suicide'].value_counts(ascending=False)

### Rob shared a method to separate the amount of mental health issues per row and counted it.  You'll now see a new column added to the dataframe called MH_Issues_Count

In [None]:
mental_health_df = all_ci['CRISIS Issues - Mental Health'].notnull()

In [None]:
mental_health_df = all_ci[mental_health_df]

In [None]:
count = []

for x in mental_health_df['CRISIS Issues - Mental Health']:
    counter0 = x.count(";") + 1
    count.append(counter0)

In [None]:
mental_health_df['MH_Issues_Count'] = count

In [None]:
mental_health_df.head(200)

In [None]:
mental_health_df.mean()

## THIS IS PREPARATION FOR YOUR OWN DF WITH INFORMATION
## CRISIS Issues - Substances - This is what you're working on.

In [None]:
substances_df = all_ci['CRISIS Issues - Substances'].notnull()
substances_df = all_ci[substances_df]

In [None]:
count = []

for x in substances_df['CRISIS Issues - Substances']:
    counter0 = x.count(";") + 1
    count.append(counter0)

In [None]:
substances_df['substances_count'] = count

In [None]:
substances_df.head(200)

In [None]:
#Created list count of the Crisis issues for substances. 
substancecount = substances_df['CRISIS Issues - Substances'].value_counts().rename_axis('CRISIS Issues - Substances').reset_index(name='counts')
substancecount

In [None]:
#Broke it down to the top 10  - I came back to this after adding two medication dependency rows. Had to get top 11 for the correction to be top 10.
substancecount_top10 = substancecount.head(11)
substancecount_top10

In [None]:
#Added together the medication dependency rows.
amendrow = {'CRISIS Issues - Substances': 'Medication Dependency', 'counts': 220}
substancecount_top10 = substancecount_top10.append(amendrow, ignore_index = True)
  
display(substancecount_top10)

In [None]:
#Dropped the two extra rows after adding the 11th index
substancecounttop10 = substancecount_top10.drop(index=[5,7])
substancecounttop10

#I think...I could've done this all on one line above.  Whatever!  I reset the index to show the correct count amount.

In [None]:
substancecounttop10 = substancecounttop10.dropna()
substancecounttop10 = substancecounttop10.reset_index(drop=True)
substancecounttop10

In [None]:
#Sorted the counts decending.
substancesfinal = substancecounttop10.sort_values(by='counts', ascending=True)
substancesfinal

In [None]:
subcount = substancesfinal['counts'].copy()
substances = substancesfinal['CRISIS Issues - Substances'].copy()


plt.subplots(figsize=(11, 10))
plt.bar(substances, subcount, color=['#fde725','#b5de2b','#6ece58','#35b779','#1f9e89','#26828e','#31688e','#3e4989','#482878','#440154'], edgecolor = 'black')
plt.legend()
plt.ylabel("Issue count")
plt.title("Crisis Issues - Substances")
plt.xticks(rotation = 70)
sns.set(style="darkgrid")
plt.show()

In [None]:
#charted the top 10 substances.
subcount = substancesfinal['counts'].copy()
substances = substancesfinal['CRISIS Issues - Substances'].copy()

plt.figure(figsize=(11, 10))
plt.barh(substances, subcount, color=['#c2df23','#86d549','#52c569','#2ab07f','#1e9b8a','#25858e','#2d708e','#38588c','#433e85','#440154'], edgecolor = 'black')
plt.xticks(rotation = 0, fontsize=12)
plt.xlabel('Issues Count', fontsize=12)
plt.yticks(fontsize=12)
sns.set(style="darkgrid")
plt.title('Crisis Issues - Substances', fontsize=12);

CRISIS Issues - Relationships
Male-Female
Peer Group/Friend
Neighbor/Landlord
Family
Same-Sex Relationship
Other Relationships
Employer/Co-Worker
Marital/Divorce
Concern for Other
Custody Issues
Blended Family
Therapist/Medical Doctor
Parent-Child
Conflict With Other
Teacher/School
Inter-racial/cross-cultural issues
Codependency
Roommate
Crisis Counselor
Case Manager
Caregiver
Police
Animal/Pet
Church
Clergy
Single Parent-Child
Therapist/Medical Doctor
Animal/Pet
Peer Group/Friend
Marital/Divorce
Concern for Other
Male-Female
Conflict With Other
Family
Other Relationships
Same-Sex Relationship
Parent-Child
Neighbor/Landlord
Police
Inter-racial/cross-cultural issues
Teacher/School
Crisis Counselor
Case Manager
Roommate
Blended Family
Single Parent-Child
Church
Caregiver
Clergy
Employer/Co-Worker
Custody Issues
Runaway/Missing

In [None]:
data = pd.DataFrame(substances_df, columns = ["CRISIS Issues - Relationships"])
data

In [None]:
## well this failed spectacularly.  This is not the type of word count I'm looking for.  This looks for it in a sentence?
## I'm looking for something that counts key words in a column.
def data(str):
    counts = dict()
    words = str.split()

    for word in words:
        if word in counts:
            counts[word] += 1
        else:
            counts[word] = 1

    return counts
print( data ('Male-Female'))

In [None]:
#Running the melt method
substances=all_ci['CRISIS Issues - Substances'].str.split('; ', expand=True)

In [None]:
melted=substances.melt().value.value_counts()
melted

In [None]:
melted1 = pd.DataFrame(melted)
melted1

In [None]:
melted1.reset_index(inplace=True)

In [None]:
melted1

In [None]:
melted1 = melted1.sort_values(by='value', ascending=True)

## Two charts were used in the presentation.  This was the first chart.
## Note - This chart actually shows more than 1159 alcohol related issues.  When you do a .melt count on issues in other columns, it breaks it down and counts each cell with INDIVIDUAL issues.  So one cell could have more than one issue.  Because of this, you have more issues total than just 1159.

In [None]:
#charted the top 10 substances.
subcount = melted1['index'].copy()
substances = melted1['value'].copy()

plt.figure(figsize=(15, 13))
plt.barh(subcount, substances, color=['#d0e11c','#a0da39','#73d056','#4ac16d','#2db27d','#1fa187','#21918c','#277f8e','#2e6e8e','#365c8d','#3f4788','#46327e','#481b6d','#440154'])
plt.xticks(rotation = 0, fontsize=13)
plt.xlabel('Count of Calls', fontsize=15)
plt.yticks(fontsize=15)
sns.set(style="darkgrid")
plt.title('Number of Calls Mentioning Crisis Issues - Substances', fontsize=18);


## I want a correlated chart that compares all the other columns to any mention of alcohol in the substances column.

In [None]:
#Attempting to create a df that'll focus on alcohol substances and the top issue correlated with alcohol for each Crisis issue.
correlate = all_ci[['CRISIS Issues - Substances', 'CRISIS Issues - Abusive Behavior','CRISIS Issues - Emotional State','CRISIS Issues - Financial/Basic Needs','CRISIS Issues - Health/Physical','CRISIS Issues - Homicide','CRISIS Issues - Information or Services Needed','CRISIS Issues - Mental Health','CRISIS Issues - No Issue Call','CRISIS Issues - Other Description','CRISIS Issues - Relationships','CRISIS Issues - Suicide'
]].copy()
correlate

In [None]:
alcohol1 = correlate['CRISIS Issues - Substances'].str.contains('Alcohol Problem',case=False, na=False)
alcohol2 = correlate[alcohol1]

In [None]:
alcohol2

In [None]:
abuse_behav1 = alcohol2['CRISIS Issues - Abusive Behavior'].str.split('; ', expand=True)
abuse_behav2 = abuse_behav1.melt().value.value_counts()
abuse_behav2

In [None]:
emotional_state1 = alcohol2['CRISIS Issues - Emotional State'].str.split('; ', expand=True)
emotional_state2 = emotional_state1.melt().value.value_counts()
emotional_state2

In [None]:
financial1 = alcohol2['CRISIS Issues - Financial/Basic Needs'].str.split('; ', expand=True)
financial2 = financial1.melt().value.value_counts()
financial2

In [None]:
#CRISIS Issues - Health/Physical
health1 = alcohol2['CRISIS Issues - Health/Physical'].str.split('; ', expand=True)
health2 = health1.melt().value.value_counts()
health2

In [None]:
#CRISIS Issues - Homicide
homicide1 = alcohol2['CRISIS Issues - Homicide'].str.split('; ', expand=True)
homicide2 = homicide1.melt().value.value_counts()
homicide2

In [None]:
#CRISIS Issues - Information or Services Needed
information_ser1 = alcohol2['CRISIS Issues - Information or Services Needed'].str.split('; ', expand=True)
information_ser2 = information_ser1.melt().value.value_counts()
information_ser2

In [None]:
#CRISIS Issues - Mental Health
mental_health1 = alcohol2['CRISIS Issues - Mental Health'].str.split('; ', expand=True)
mental_health2 = mental_health1.melt().value.value_counts()
mental_health2

In [None]:
#CRISIS Issues - No Issue Call
no_issue_call1 = alcohol2['CRISIS Issues - No Issue Call'].str.split('; ', expand=True)
no_issue_call2 = no_issue_call1.melt().value.value_counts()
no_issue_call2

In [None]:
#CRISIS Issues - Other Description
other_desc1 = alcohol2['CRISIS Issues - Other Description'].str.split('; ', expand=True)
other_desc2 = other_desc1.melt().value.value_counts()
other_desc2

In [None]:
#CRISIS Issues - Relationships
relationships1 = alcohol2['CRISIS Issues - Relationships'].str.split('; ', expand=True)
relationships2 = relationships1.melt().value.value_counts()
relationships2

In [None]:
#CRISIS Issues - Suicide
suicide1 = alcohol2['CRISIS Issues - Suicide'].str.split('; ', expand=True)
suicide2 = suicide1.melt().value.value_counts()
suicide2

In [None]:
abuse_behav3 = pd.DataFrame(abuse_behav2)
abuse_behav4 = abuse_behav3.head(1)

emotional_state3 = pd.DataFrame(emotional_state2)
emotional_state4 = emotional_state3.head(1)

financial3 = pd.DataFrame(financial2)
financial4 = financial3.head(1)

health3 = pd.DataFrame(health2)
health4 = health3.head(1)

information_ser3 = pd.DataFrame(information_ser2)
information_ser4 = information_ser3.head(1)

mental_health3 = pd.DataFrame(mental_health2)
mental_health4 = mental_health3.head(1)

other_desc3 = pd.DataFrame(other_desc2)
other_desc4 = other_desc3.head(1)

relationships3 = pd.DataFrame(relationships2)
relationships4 = relationships3.head(1)

suicide3 = pd.DataFrame(suicide2)
suicide4 = suicide3.head(1)

In [None]:
alcohol_cor1 = [abuse_behav4, emotional_state4, financial4, health4, information_ser4, mental_health4, other_desc4, relationships4, suicide4]
alcohol_cor2 = pd.concat(alcohol_cor1)
alcohol_cor2

In [None]:
alcohol_cor2.reset_index(inplace=True)

In [None]:
alcohol_cor2

In [None]:
alcohol_final = alcohol_cor2.sort_values(by='value', ascending=True)

In [None]:
alcohol_final

### Here I renamed index 8 to remove the caps used.

In [None]:
alcohol_final.at[8,'index']

In [None]:
alcohol_final.at[8,'index'] = 'Current Thoughts/Ideations'

In [None]:
alcohol_final

## I did it!  I created a df that had all the columns.  From there, I filtered the substances column to only show results for anytime Alcohol was entered.  After that, I created a count for each of the Crisis Issues columns and created separate dfs for that.  From there, I condensed it to only show me the top issue that was counted the most.  I created a final dataframe that combined those top issues, sorted it and graphed below.

## This was the second chart used in the presentation.

In [None]:
subcount = alcohol_final['value'].copy()
substances = alcohol_final['index'].copy()

plt.figure(figsize=(15, 13))
plt.barh(substances, subcount, color=['#86d549','#52c569','#2ab07f','#1e9b8a','#25858e','#2d708e','#38588c','#433e85','#440154'])
plt.xticks(rotation = 0, fontsize=13)
plt.xlabel('Count of Calls', fontsize=15)
plt.yticks(fontsize=15)
sns.set(style="darkgrid")
plt.title('Crisis Issues Correlated With Alcohol', fontsize=18);



In [None]:
plt.subplots(figsize=(12,10))
sns.set(style="whitegrid")
sns.set(font_scale=1.2)
y=alcohol_final['index']
x=alcohol_final['value']
ax = sns.barplot(x, y=y, data=alcohol_final,
            label="Total", palette=sns.color_palette('viridis', n_colors=21), edgecolor='black')
plt.title('Crisis Issues Correlated With Alcohol', fontsize=15, fontstyle='oblique')
plt.ylabel('Concern')
sns.set(style="darkgrid")
plt.xlabel('% of SI Calls Issue Occured On');


# Use this to pull the table results from the yahoo finance page.

table_17=pd.read_html("https://web.archive.org/web/201704083404/https://www.numbeo.com/cost-of-living/country_result.jsp?country=United+States")
table_17= table_17[3]