In [1]:
import pandas as pd
import plotly.express as px

In [2]:
# Load your Excel file
file_path = 'Cases_Report___General_Analysis - 2024-09-16.xlsx'
df = pd.read_excel(file_path)
# Convert 'Date Closed' to datetime and extract the year
df['Date Closed'] = pd.to_datetime(df['Date Closed'])
df['Year Closed'] = df['Date Closed'].dt.year

In [3]:
df.head(2)

Unnamed: 0,ID#,Client First Name,Client Last Name,Disposition,Legal Problem,Special Legal Problem Code,Intake Date,Date Opened,Date Closed,Case Close Reason,Funding Code(s),Outcome,Age at Intake,Gender,Race,AMI Category,Program Name,Total Number Helped,Office Name,Year Closed
0,0.0,Phyllis,Coe,Closed,32 Divorce/Sep./Annul.,,01/08/2002,,2007-01-01,G Negotiated Settlement (with Litigation),40 General,,59,Female,Black/African American,,Staff,1,Adams Street,2007
1,0.0,Jennifer,Pavlosky,Closed,32 Divorce/Sep./Annul.,,04/14/2004,,2007-01-01,Xa Client Withdrew before Legal Assistance,15 State CLA,,32,Female,White,,Staff,7,Adams Street,2007


In [4]:
bins = [0, 17, 24, 34, 44, 54, 64, 80, 120]
labels = ['1-17', '18-24', '25-34', '35-44', '45-54', '55-64', '65-80', '80+']

In [5]:
# Bin the data
df['Age Group'] = pd.cut(df['Age at Intake'], bins=bins, labels=labels, right=True)

In [6]:
df['Case Close Reason'].value_counts()

Case Close Reason
A Counsel and Advice                                             81784
H Administrative Agency Decision                                 18976
B Limited Action (was Brief Service)                             18064
G Negotiated Settlement (with Litigation)                         3823
Xa Client Withdrew before Legal Assistance                        2872
F Negotiated Settlement (without Litigation)                      2832
Ia Uncontested Court Decisions                                    2730
K Other                                                           2714
L Extensive Service (not FGHI)                                    2391
Ib Contested Court Decisions                                      1884
C Referred After Legal Assessment(Pre2009)                        1614
I Court Decision(Pre2009)                                          854
Xe Not timely closed                                               284
D Insufficient Merit to Proceed(Pre2009)                   

In [7]:
# List of values to filter by
case_close_reasons = [
    'A Counsel and Advice',
    'H Administrative Agency Decision',
    'B Limited Action (was Brief Service)',
    'G Negotiated Settlement (with Litigation)',
    'F Negotiated Settlement (without Litigation)',
    'Ia Uncontested Court Decisions',
    'L Extensive Service (not FGHI)',
    'Ib Contested Court Decisions',
    'C Referred After Legal Assessment(Pre2009)',
    'I Court Decision(Pre2009)',
    'D Insufficient Merit to Proceed(Pre2009)',
    'J Change in Eligibility Status(Pre2009)',
    'Ic Appeals'
]

# Filter the DataFrame
new_df = df[df['Case Close Reason'].isin(case_close_reasons)]

# Count the occurrences of each 'Case Close Reason'
case_close_reason_counts = new_df['Case Close Reason'].value_counts()
print(case_close_reason_counts)

Case Close Reason
A Counsel and Advice                            81784
H Administrative Agency Decision                18976
B Limited Action (was Brief Service)            18064
G Negotiated Settlement (with Litigation)        3823
F Negotiated Settlement (without Litigation)     2832
Ia Uncontested Court Decisions                   2730
L Extensive Service (not FGHI)                   2391
Ib Contested Court Decisions                     1884
C Referred After Legal Assessment(Pre2009)       1614
I Court Decision(Pre2009)                         854
D Insufficient Merit to Proceed(Pre2009)          231
J Change in Eligibility Status(Pre2009)            49
Ic Appeals                                         32
Name: count, dtype: int64


In [8]:
new_df.head()

Unnamed: 0,ID#,Client First Name,Client Last Name,Disposition,Legal Problem,Special Legal Problem Code,Intake Date,Date Opened,Date Closed,Case Close Reason,...,Outcome,Age at Intake,Gender,Race,AMI Category,Program Name,Total Number Helped,Office Name,Year Closed,Age Group
0,0.0,Phyllis,Coe,Closed,32 Divorce/Sep./Annul.,,01/08/2002,,2007-01-01,G Negotiated Settlement (with Litigation),...,,59,Female,Black/African American,,Staff,1,Adams Street,2007,55-64
2,0.0,Nancy Jo,Pierce,Closed,32 Divorce/Sep./Annul.,,09/14/2004,,2007-01-01,I Court Decision(Pre2009),...,,36,Female,White,,Staff,5,Adams Street,2007,35-44
3,0.0,Karen,Pike,Closed,32 Divorce/Sep./Annul.,,04/26/2005,,2007-01-01,G Negotiated Settlement (with Litigation),...,,34,Female,Native American,,Staff,5,Adams Street,2007,25-34
4,0.0,Roy James,Aguilar,Closed,32 Divorce/Sep./Annul.,320 Divorce-Served Papers,06/21/2006,,2007-01-02,I Court Decision(Pre2009),...,,45,Male,White,,Staff,1,Clay County,2007,45-54
5,0.0,Marilyne M.,Alexander,Closed,32 Divorce/Sep./Annul.,320 Divorce-Served Papers,10/07/2005,,2007-01-02,I Court Decision(Pre2009),...,,39,Female,Black/African American,,Staff,8,Clay County,2007,35-44


In [9]:
# Group by Close Reason and Year
grouped = new_df.groupby(['Date Closed',
                      'Year Closed',
                      'Program Name',
                      'Legal Problem',
                      'Special Legal Problem Code',
                      'Funding Code(s)',
                      'Case Close Reason',
                      'Age Group',
                      'Gender',
                      'Race',
                      'Total Number Helped',
                      'Office Name'], 
                      observed=True)['ID#'].count().reset_index()

# Save the data to JSON format
grouped.to_json('data_9-16.json', orient='records')

In [10]:
# Get counts for each age group
age_distribution = new_df['Age Group'].value_counts().sort_index()

# Convert to a dictionary suitable for Chart.js
age_distribution_dict = age_distribution.to_dict()

# Example output for use in Chart.js
print(age_distribution_dict)

{'1-17': 4602, '18-24': 10311, '25-34': 30078, '35-44': 29739, '45-54': 26712, '55-64': 17878, '65-80': 12003, '80+': 2372}


In [11]:
# Group by Close Reason and calculate percentages per year
test_year = 2021  # Example year to filter
df_filtered = new_df[new_df['Year Closed'] == test_year]

In [12]:
# Group by Close Reason
test_close_reason_counts = df_filtered.groupby('Case Close Reason')['ID#'].count().reset_index(name='Count')

# Generate pie chart
fig = px.pie(test_close_reason_counts, values='Count', names='Case Close Reason', title='TEST - Close Reasons for 2021')
fig.show()

In [13]:
# Group by Gender
test_gender_counts = df_filtered.groupby('Gender')['ID#'].count().reset_index(name='Count')

# Generate pie chart
fig = px.pie(test_gender_counts, values='Count', names='Gender', title='TEST - Gender Split for 2021')
fig.show()

In [15]:
# Group by Intake Office
intake_office_counts = df_filtered.groupby('Office Name')['ID#'].count().reset_index(name='Count')

# Generate pie chart
fig = px.pie(intake_office_counts, values='Count', names='Office Name', title='TEST - Intake Office Distribution for 2021')
fig.show()