# Family and Children's Services Crisis Project

1. Examine call volume and identify surge times, seasons, or events.
2. Analyze trends in call content by looking for which issues are most frequent and what is the average count of issues per call.
3. Assess regional and demographic trends in call content and call volume.
4. Determine the count and success rate of imminent risk calls.

In [None]:
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import requests as re

In [None]:
# Read in the two main files for 2021-2022
df1 = pd.read_excel('../data/NSSCrisisv_1.xlsx') # 2021
df2 = pd.read_excel('../data/NSSCrisisv_2.xlsx') # 2022

In [None]:
# Rob: Drop columns whose entire values are NaN
df1.dropna(how='all', axis=1, inplace=True)
df2.dropna(how='all', axis=1, inplace=True)

In [None]:
# Rob: Drop columns whose entire values are 98% NaN
pct_null = df1.isnull().sum() / len(df1)
missing_features = pct_null[pct_null > 0.98].index
df1.drop(missing_features, axis=1, inplace=True)

In [None]:
# Read in the call center data for 2020-2022
df_2020 = pd.read_excel('../data/2020callcenter.xlsx')
df_2021 = pd.read_excel('../data/2021callcenter.xlsx')
df_2022 = pd.read_excel('../data/2022callcenter.xlsx')

In [None]:
# Maggie: Merge ethnicity columns
df1.rename(columns={'CRISIS Demographics - Ethnicity': 'Ethnicity'}, inplace=True)
df2.rename(columns={'CRISIS Demographics - Race/Ethnicity': 'Ethnicity'}, inplace=True)

## EDA

In [None]:
# Rudy: Find all column names
df1.dtypes

In [None]:
# Maggie: Find the columns that are the same in both dfs
samecolumnsdf = pd.DataFrame(df1.columns.intersection(df2.columns))
samecolumnsdf

#### Most common states that calls originate from:

In [None]:
n = 10
df1['ThirdPartyStateProvince'].value_counts()[:n].index.tolist()

In [None]:
# 2021
df1['ThirdPartyStateProvince'].value_counts()[:10].sort_values(ascending=False)

In [None]:
# 2022
df2['ThirdPartyStateProvince'].value_counts()[:10].sort_values(ascending=False)

#### Most common zip code (col=PostalCode) that calls originate from:

In [None]:
df1['PostalCode'].value_counts()[:10].sort_values(ascending=False)