In [1]:
# Dropouts and number of barriers.
# Considering average number of barriers for those who have dropped out. 
# Comparing this to average number of barriers for those who have reached PD. 
# Considering just those who are new engagements since 08 April (new system)

In [2]:
import pandas as pd
OFN = pd.read_csv('OFN.csv', encoding='latin-1')
pd.set_option('mode.chained_assignment', None)

In [3]:
# Remove all forms except Destination Achieved, Drop Out, New Engagement and Continuous Support forms. 
# Filter rows based on Record Type
desired_record_types = ['Destination Achieved', 'Drop Out', 'Continuous Support', 'New Engagement']
filtered_OFN = OFN[OFN['Record Type'].isin(desired_record_types)]

In [4]:
# Create three data frames based on Record Types. 
destination_achieved = filtered_OFN[filtered_OFN['Record Type'] == 'Destination Achieved']
continuous_support = filtered_OFN[filtered_OFN['Record Type'] == 'Continuous Support']
new_engagement = filtered_OFN[filtered_OFN['Record Type'] == 'New Engagement']
drop_out = filtered_OFN[filtered_OFN['Record Type'] == 'Drop Out']

In [5]:
# Keep only the first Continuous Support form submitted based on Created Date
continuous_support = continuous_support.sort_values(by='Created Date').groupby('Full Name').head(1)

In [6]:
# Check for duplicates in Destination Achieved
destination_achieved_duplicates = destination_achieved[destination_achieved.duplicated(subset='Full Name', keep=False)]
destination_achieved_duplicates_count = destination_achieved_duplicates['Full Name'].value_counts()

print("Destination Achieved Duplicates Value Counts:")
print(destination_achieved_duplicates_count)


Destination Achieved Duplicates Value Counts:
EACC194               2
RUTS-2023-Hig90837    2
RUTS-2023-Mac92043    2
926207859             2
926207878             2
926207899             2
926207914             2
Name: Full Name, dtype: int64


In [7]:
# Removing duplicates - sort values
destination_achieved = destination_achieved.sort_values(by='Created Date')

# Drop duplicates and keep the earliest record type for each Full Name
destination_achieved_no_duplicates = destination_achieved.drop_duplicates(subset='Full Name', keep='last')

In [8]:
# Check for duplicates in Continuous Support
continuous_support_duplicates = continuous_support[continuous_support.duplicated(subset='Full Name', keep=False)]
continuous_support_duplicates_count = continuous_support_duplicates['Full Name'].value_counts()

print("Continuous Support Duplicates Value Counts:")
print(continuous_support_duplicates_count)


Continuous Support Duplicates Value Counts:
Series([], Name: Full Name, dtype: int64)


In [9]:
# Check for duplicates in Continuous Support
new_engagement_duplicates = new_engagement[new_engagement.duplicated(subset='Full Name', keep=False)]
new_engagement_duplicates_count = new_engagement_duplicates['Full Name'].value_counts()

print("New Engagements Duplicates Value Counts:")
print(new_engagement_duplicates_count)

New Engagements Duplicates Value Counts:
OFNAch35310                 4
AF2804531                   2
AF2850645                   2
AFC-331740                  2
ENA-OFN-NS-ENG2023107013    2
FAR-2023-HYND0009           2
S54                         2
OA20231121                  2
OFNMer36459                 2
Tull430                     2
Name: Full Name, dtype: int64


In [10]:
# Removing duplicates - sort values
new_engagement = new_engagement.sort_values(by='Created Date')

# Drop duplicates and keep the earliest record type for each Full Name
new_engagement_no_duplicates = new_engagement.drop_duplicates(subset='Full Name', keep='first')

In [11]:
# Check for duplicates in Drop Out
drop_out_duplicates = drop_out[drop_out.duplicated(subset='Full Name', keep=False)]
drop_out_duplicates_count = drop_out_duplicates['Full Name'].value_counts()

print("Drop Out Duplicates Value Counts:")
print(drop_out_duplicates_count)

Drop Out Duplicates Value Counts:
AFC/265994           4
AFC/268445           4
AFC/269194           3
AFC/256436           3
AFC/256097           3
                    ..
AFC/267849           2
AFC/267792           2
AFC/267754           2
AFC/266913           2
FAR-2021-ALLS0057    2
Name: Full Name, Length: 82, dtype: int64


In [12]:
# Removing duplicates - sort values
drop_out = drop_out.sort_values(by='Created Date')

# Drop duplicates and keep the earliest record type for each Full Name
drop_out_no_duplicates = drop_out.drop_duplicates(subset='Full Name', keep='last')

In [13]:
# Concat the results so all forms are combined into one data frame. 
OFN_four_forms = pd.concat([destination_achieved_no_duplicates, drop_out_no_duplicates, continuous_support, new_engagement_no_duplicates])

In [14]:
# Export the concatenated DataFrame to Excel
OFN_four_forms.to_excel('combined_forms.xlsx', index=False)

In [15]:
# Create data frames with specific combinations
PD_dataframe = OFN_four_forms[OFN_four_forms['Record Type'].isin(['Destination Achieved', 'Continuous Support', 'New Engagement'])]
Dropout_dataframe = OFN_four_forms[OFN_four_forms['Record Type'].isin(['Drop Out', 'Continuous Support', 'New Engagement'])]

# Find Full Names with all three forms in each data frame
full_names_with_all_forms_PD = PD_dataframe['Full Name'].value_counts()[PD_dataframe['Full Name'].value_counts() == 3].index
full_names_with_all_forms_Dropout = Dropout_dataframe['Full Name'].value_counts()[Dropout_dataframe['Full Name'].value_counts() == 3].index

# Filter the original DataFrames to keep only those Full Names
PD_final = OFN_four_forms[OFN_four_forms['Full Name'].isin(full_names_with_all_forms_PD)]
Dropout_final = OFN_four_forms[OFN_four_forms['Full Name'].isin(full_names_with_all_forms_Dropout)]

In [16]:
# Save the result to a new CSV file or update the existing one
PD_final.to_csv('PD_final.csv', index=False)

# Save the result to a new CSV file or update the existing one
Dropout_final.to_csv('Dropout_final.csv', index=False)

In [17]:
# Provide value ocunts of each dataset, to see record types in each.
pd_final_record_counts = PD_final['Record Type'].value_counts()
dropout_final_record_counts = Dropout_final['Record Type'].value_counts()

print("PD_final record counts:")
print(pd_final_record_counts)

print("\nDropout_final record counts:")
print(dropout_final_record_counts)

PD_final record counts:
Destination Achieved    495
Continuous Support      495
New Engagement          495
Drop Out                 10
Name: Record Type, dtype: int64

Dropout_final record counts:
Drop Out                47
Continuous Support      47
New Engagement          47
Destination Achieved    10
Name: Record Type, dtype: int64


In [18]:
# Remove all Full Names with Drop Out forms so as not to double count. 
PD_final = PD_final[~PD_final['Full Name'].isin(drop_out_no_duplicates['Full Name'])]

In [19]:
# Check value counts again. 
PD_final_record_counts = PD_final['Record Type'].value_counts()
print("\nDropout_final record counts:")
print(PD_final_record_counts)


Dropout_final record counts:
Destination Achieved    485
Continuous Support      485
New Engagement          485
Name: Record Type, dtype: int64


In [20]:
print("\nFinal Datasets to Analyse:")
print("\nDropout_final")
print("\nPD_final")


Final Datasets to Analyse:

Dropout_final

PD_final


In [21]:
# Comparitive analyses: barriers of those who dropout and those who reach PD.
# Considering number of barriers for those who have dropped out (even if they later acheived a PD) and those who have reached PD (removing those who dropped out before reaching PD)


In [22]:
# PD_final
# Create a new dataframe with only CS forms from PD_final
PD_final_continuous_support = PD_final[PD_final['Record Type'] == 'Continuous Support']

In [23]:
# Remove white spaces from the "Barriers" column
PD_final_continuous_support['Barriers'] = PD_final_continuous_support['Barriers'].str.replace(' ', '')

# Create a set to store unique barrier values
unique_barriers = set()

# Iterate over each row in the "Barriers" column
for row in PD_final_continuous_support['Barriers'].dropna():
    # Split the values in the current row by semicolon
    row_values = row.split(';')
    
    # Add each unique value to the set
    unique_barriers.update(row_values)

# Create new columns based on the unique barrier values
for barrier in unique_barriers:
    PD_final_continuous_support[barrier] = PD_final_continuous_support['Barriers'].apply(lambda x: 1 if isinstance(x, str) and barrier in x.split(';') else 0)

In [24]:
# Export to Excel
excel_filename = 'PD_final_continuous_support.xlsx'

# Export to Excel
excel_filename = 'PD_final_continuous_support.xlsx'
PD_final_continuous_support.to_excel(excel_filename, index=False)


In [25]:
# Create 'barriers_all' column
PD_final_continuous_support['barriers_all'] = 0

# List of columns to iterate through
# Unemployment removed as not a barrier in this dataset, add in accordingly for future.
columns_to_count = [
    'MentalHealthChallenges', 'Refugeeorasylumseeker', 'Homeless',
    'LookedafterYP/CareExperienced', 'Criminalconviction',
    'Physicalhealthissues/disability', 'Transportationissues', 'FamilyIssues',
    'Carer', 'AddictionIssues', 'Literacy/Numeracyissues', 'Learningchallenges'
]

# Iterate through specified columns and count occurrences of '1'
for column in columns_to_count:
    PD_final_continuous_support['barriers_all'] += PD_final_continuous_support[column]

In [26]:
print(PD_final_continuous_support.barriers_all)

5332    1
5243    1
6389    1
6354    2
5951    1
       ..
2099    2
2177    1
2122    1
2119    3
2219    2
Name: barriers_all, Length: 485, dtype: int64


In [27]:
# Calculate the average using mean method
average_value = PD_final_continuous_support['barriers_all'].mean()

print(f"The average barriers for those achieving PD is: {average_value}")

The average barriers for those achieving PD is: 1.5195876288659793


In [28]:
# Dropout_final
# Create a new dataframe with only CS forms from Dropout_final
Dropout_final_continuous_support = Dropout_final[Dropout_final['Record Type'] == 'Continuous Support']

In [29]:
# Remove white spaces from the "Barriers" column
Dropout_final_continuous_support['Barriers'] = Dropout_final_continuous_support['Barriers'].str.replace(' ', '')

# Create a set to store unique barrier values
unique_barriers = set()

# Iterate over each row in the "Barriers" column
for row in Dropout_final_continuous_support['Barriers'].dropna():
    # Split the values in the current row by semicolon
    row_values = row.split(';')
    
    # Add each unique value to the set
    unique_barriers.update(row_values)

# Create new columns based on the unique barrier values
for barrier in unique_barriers:
    Dropout_final_continuous_support[barrier] = Dropout_final_continuous_support['Barriers'].apply(lambda x: 1 if isinstance(x, str) and barrier in x.split(';') else 0)


In [30]:
# Create 'barriers_all' column
Dropout_final_continuous_support['barriers_all'] = 0

# List of columns to iterate through
# 'Unemployment' removed as not a barrier in this dataset, add in accordingly for future.
# 'Refugeeorasylumseeker' removed as not a barrier in this dataset, add in accordingly for future.
columns_to_count = [
    'MentalHealthChallenges', 'Homeless',
    'LookedafterYP/CareExperienced', 'Criminalconviction',
    'Physicalhealthissues/disability', 'Transportationissues', 'FamilyIssues',
    'Carer', 'AddictionIssues', 'Literacy/Numeracyissues', 'Learningchallenges'
]

# Iterate through specified columns and count occurrences of '1'
for column in columns_to_count:
    Dropout_final_continuous_support['barriers_all'] += Dropout_final_continuous_support[column]

In [31]:
# Calculate the average using mean method
average_value_two = Dropout_final_continuous_support['barriers_all'].mean()

print(f"The average barriers for Dropouts is: {average_value_two}")

The average barriers for Dropouts is: 1.9787234042553192


In [32]:
# Value counts of school support statements and dropouts
Dropout_schoolsupport_record_counts = Dropout_final_continuous_support['School support statements'].value_counts()
print("\nSchool Support Statement counts:")
print(Dropout_schoolsupport_record_counts)


School Support Statement counts:
Mostly bad           24
Mostly good          12
Not yet disclosed    10
Currently Unknown     1
Name: School support statements, dtype: int64


In [33]:
# Value counts of school support statements and positive destination achieved
PD_schoolsupport_record_counts = PD_final_continuous_support['School support statements'].value_counts()
print("\nSchool Support Statement counts:")
print(PD_schoolsupport_record_counts)


School Support Statement counts:
Mostly good          185
Mostly bad           181
Not yet disclosed    112
Name: School support statements, dtype: int64


In [34]:
# Wellbeing for PD - from first continuous support form
# Calculate the average using mean method
average_value_wellbeing_PD = PD_final_continuous_support['General wellbeing/happiness rating'].mean()

print(f"The average wellbeing for YP reaching PD is: {average_value_wellbeing_PD}")

The average wellbeing for YP reaching PD is: 6.414432989690722


In [35]:
# Shanarri for Dropouts - from first continuous support form
# Calculate the average using mean method
average_value_wellbeing = Dropout_final_continuous_support['General wellbeing/happiness rating'].mean()
average_value_safe = Dropout_final_continuous_support['Extent YP feels safe'].mean()
average_value_healthy = Dropout_final_continuous_support['Extent YP feels healthy'].mean()
average_value_achieving = Dropout_final_continuous_support['Extent YP feels they are achieving'].mean()
average_value_nurtured = Dropout_final_continuous_support['Extent YP feels nurtured'].mean()
average_value_active = Dropout_final_continuous_support['Extent YP feels active'].mean()
average_value_included = Dropout_final_continuous_support['Extent YP feels included'].mean()
average_value_respected = Dropout_final_continuous_support['Extent YP feels respected'].mean()
average_value_responsible = Dropout_final_continuous_support['Extent YP feels responsible'].mean()

print(f"The average wellbeing for Dropouts is: {average_value_wellbeing}")
print(f"The average extent YP feels safe for Dropouts is: {average_value_safe}")
print(f"The average extent YP feels healthy for Dropouts is: {average_value_healthy}")
print(f"The average extent YP feels achieving for Dropouts is: {average_value_achieving}")
print(f"The average extent YP feels nurtured for Dropouts is: {average_value_nurtured}")
print(f"The average extent YP feels active for Dropouts is: {average_value_active}")
print(f"The average extent YP feels included for Dropouts is: {average_value_included}")
print(f"The average extent YP feels respected for Dropouts is: {average_value_respected}")
print(f"The average extent YP feels responsible for Dropouts is: {average_value_responsible}")

The average wellbeing for Dropouts is: 5.297872340425532
The average extent YP feels safe for Dropouts is: 6.340425531914893
The average extent YP feels healthy for Dropouts is: 5.319148936170213
The average extent YP feels achieving for Dropouts is: 5.0
The average extent YP feels nurtured for Dropouts is: 5.9787234042553195
The average extent YP feels active for Dropouts is: 5.446808510638298
The average extent YP feels included for Dropouts is: 5.9361702127659575
The average extent YP feels respected for Dropouts is: 5.787234042553192
The average extent YP feels responsible for Dropouts is: 5.829787234042553


In [36]:
# Shanarri for Dropouts - from first continuous support form
# Calculate the average using mean method
average_value_wellbeing_PD = PD_final_continuous_support['General wellbeing/happiness rating'].mean()
average_value_safe_PD = PD_final_continuous_support['Extent YP feels safe'].mean()
average_value_healthy_PD = PD_final_continuous_support['Extent YP feels healthy'].mean()
average_value_achieving_PD = PD_final_continuous_support['Extent YP feels they are achieving'].mean()
average_value_nurtured_PD = PD_final_continuous_support['Extent YP feels nurtured'].mean()
average_value_active_PD = PD_final_continuous_support['Extent YP feels active'].mean()
average_value_included_PD = PD_final_continuous_support['Extent YP feels included'].mean()
average_value_respected_PD = PD_final_continuous_support['Extent YP feels respected'].mean()
average_value_responsible_PD = PD_final_continuous_support['Extent YP feels responsible'].mean()

print(f"The average wellbeing for PD is: {average_value_wellbeing_PD}")
print(f"The average extent YP feels safe for PD is: {average_value_safe_PD}")
print(f"The average extent YP feels healthy for PD is: {average_value_healthy_PD}")
print(f"The average extent YP feels achieving for PD is: {average_value_achieving_PD}")
print(f"The average extent YP feels nurtured for PD is: {average_value_nurtured_PD}")
print(f"The average extent YP feels active for PD is: {average_value_active_PD}")
print(f"The average extent YP feels included for PD is: {average_value_included_PD}")
print(f"The average extent YP feels respected for PD is: {average_value_respected_PD}")
print(f"The average extent YP feels responsible for PD is: {average_value_responsible_PD}")

The average wellbeing for PD is: 6.414432989690722
The average extent YP feels safe for PD is: 7.082474226804123
The average extent YP feels healthy for PD is: 6.507216494845361
The average extent YP feels achieving for PD is: 6.078350515463917
The average extent YP feels nurtured for PD is: 6.680412371134021
The average extent YP feels active for PD is: 6.424742268041237
The average extent YP feels included for PD is: 6.688659793814433
The average extent YP feels respected for PD is: 6.645360824742268
The average extent YP feels responsible for PD is: 6.802061855670103


In [37]:
# Age at point of engagement
# Calculate the average using mean method
average_value_age_PD = PD_final_continuous_support['Age at point of engagement'].mean()

print(f"The average age for YP reaching PD is: {average_value_age_PD}")



The average age for YP reaching PD is: 17.31875


In [38]:
# Age at point of engagement
# Calculate the average using mean method
average_value_age_Dropout = Dropout_final_continuous_support['Age at point of engagement'].mean()

print(f"The average age for YP Dropping out is: {average_value_age_Dropout}")

The average age for YP Dropping out is: 18.431818181818183
