## Analyze trends in call content by looking for which issues are most frequent and what is the average count of issues per call.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import requests as re

In [None]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

### Read the CSVs for NSSCrisisv1, NSSCrisisv2, CC2020, CC2021, CC2022

In [None]:
NSSCrisisv1 = pd.read_csv('../data/NSSCrisisv.1.csv')
print(NSSCrisisv1.shape)
NSSCrisisv1.head(3) 

In [None]:
header_row = NSSCrisisv1.iloc[1]
NSSCrisisv1 = pd.DataFrame(NSSCrisisv1.values[2:], columns=header_row)
NSSCrisisv1.head()

In [None]:
NSSCrisisv2 = pd.read_csv('../data/NSSCrisisv.2.csv')
print(NSSCrisisv2.shape) 
NSSCrisisv2.head(3)

In [None]:
header_row2 = NSSCrisisv2.iloc[1]
NSSCrisisv2 = pd.DataFrame(NSSCrisisv2.values[2:], columns=header_row2)
NSSCrisisv2.head()

In [None]:
CC2020 = pd.read_csv('../data/2020CallCenterVolume.csv')
CC2021 = pd.read_csv('../data/2021CallCenterVolume.csv')
CC2022 = pd.read_csv('../data/2022CallCenterVolume.csv')

In [None]:
CC2022.head()

In [None]:
labels = list()
for col in NSSCrisisv2.columns:
    labels.append(col.replace('Subjective', 'Issues'))
NSSCrisisv2.columns = labels

In [None]:
##Reading the column names. FYI - This will only show the entire list IF you do the set the display options in step 2 above.
NSSCrisisv1.dtypes

### Code below removes all the columns that have no data at all.

In [None]:
NSSCrisisv1.dropna(how='all', axis=1, inplace=True)

In [None]:
NSSCrisisv2.dropna(how='all', axis=1, inplace=True)

In [None]:
### This will remove columns that have 98% null values in the columns.

#pct_null = crisis1_df.isnull().sum() / len(crisis1_df)
#missing_features = pct_null[pct_null > 0.98].index
#crisis1_df.drop(missing_features, axis=1, inplace=True)


In [None]:
NSSCrisisv1.dtypes

In [None]:
#NSSCrisisv1['PhoneWorkerName'].value_counts()

In [None]:
NSSCrisisv2.dtypes

In [None]:
#NSSCrisisv2['PhoneWorkerName'].value_counts()

In [None]:
#NSSCrisisv1['ThirdPartyOrganization'].value_counts()

## Made two quick dfs based on a similar column.  This'll be one way to combine the datasets, but this only does a value count on the column.

NSSCrisisv1['CRISIS Demographics - Gender Identity'].value_counts()

gender1 = NSSCrisisv1['CRISIS Demographics - Gender Identity'].value_counts().rename_axis('CRISIS Demographics - Gender Identity').reset_index(name='counts')
NSS1.head()

gender2 = NSSCrisisv2['CRISIS Demographics - Gender Identity'].value_counts().rename_axis('CRISIS Demographics - Gender Identity').reset_index(name='counts')
NSS2.head()

gendermerge = [NSS1,NSS2]
demogender = pd.concat(merge)
demogender.head(10)

demogenderfinal = nssmerge.groupby(['CRISIS Demographics - Gender Identity']).sum()
demogenderfinal.head(10)

demogenderfinal.sort_values(by='counts', ascending=False)

## we created two dfs filtering the original dfs.

In [None]:
filter_col1 = [col for col in NSSCrisisv1 if col.startswith('CRISIS Issues') or col.startswith('Call')]
filter1 = NSSCrisisv1[filter_col1]

In [None]:
filter_col2 = [col for col in NSSCrisisv2 if col.startswith('CRISIS Issues') or col.startswith('Call')]
filter2 = NSSCrisisv2[filter_col2]

In [None]:
all_ci = pd.concat([filter1, filter2])
all_ci

In [None]:
all_ci.dropna(how='all', axis=0, inplace=True)

In [None]:
all_ci

In [None]:
#all_ci['CRISIS Issues - Substances'].value_counts(ascending=False)
#all_ci['CRISIS Issues - Suicide'].value_counts(ascending=False)

### Rob shared a method to separate the amount of mental health issues per row and counted it.  You'll now see a new column added to the dataframe called MH_Issues_Count

In [None]:
mental_health_df = all_ci['CRISIS Issues - Mental Health'].notnull()

In [None]:
mental_health_df = all_ci[mental_health_df]

In [None]:
count = []

for x in mental_health_df['CRISIS Issues - Mental Health']:
    counter0 = x.count(";") + 1
    count.append(counter0)

In [None]:
mental_health_df['MH_Issues_Count'] = count

In [None]:
mental_health_df.head(200)

In [None]:
mental_health_df.mean()

## THIS IS PREPARATION FOR YOUR OWN DF WITH INFORMATION
## CRISIS Issues - Substances - This is what you're working on.

In [None]:
substances_df = all_ci['CRISIS Issues - Substances'].notnull()
substances_df = all_ci[substances_df]

In [None]:
count = []

for x in substances_df['CRISIS Issues - Substances']:
    counter0 = x.count(";") + 1
    count.append(counter0)

In [None]:
substances_df['substances_count'] = count

In [None]:
substances_df.head(200)

In [None]:
#Created list count of the Crisis issues for substances. 
substancecount = substances_df['CRISIS Issues - Substances'].value_counts().rename_axis('CRISIS Issues - Substances').reset_index(name='counts')
substancecount

In [None]:
#Broke it down to the top 10  - I came back to this after adding two medication dependency rows. Had to get top 11 for the correction to be top 10.
substancecount_top10 = substancecount.head(11)
substancecount_top10

In [None]:
#Added together the medication dependency rows.
amendrow = {'CRISIS Issues - Substances': 'Medication Dependency', 'counts': 220}
substancecount_top10 = substancecount_top10.append(amendrow, ignore_index = True)
  
display(substancecount_top10)

In [None]:
#Dropped the two extra rows after adding the 11th index
substancecounttop10 = substancecount_top10.drop(index=[5,7])
substancecounttop10

In [None]:
#I think...I could've done this all on one line above.  Whatever!  I reset the index to show the correct count amount.
substancecounttop10 = substancecounttop10.dropna()
substancecounttop10 = substancecounttop10.reset_index(drop=True)
substancecounttop10

In [None]:
#Sorted the counts decending.
substancesfinal = substancecounttop10.sort_values(by='counts', ascending=False)
substancesfinal

In [None]:
#charted the top 10 substances.
substances = substancesfinal['CRISIS Issues - Substances'].copy()
subcount = substancesfinal['counts'].copy()

plt.subplots(figsize=(15, 10))
plt.bar(substances, subcount, color=['indigo', 'red', 'green', 'blue', 'cyan', 'orange', 'purple','yellow','turquoise','mediumspringgreen'], edgecolor = 'black')
plt.legend()
plt.ylabel("Count")
plt.title("Crisis Issues - Substances")
plt.xticks(rotation = 70)
plt.show()