In [1]:
# Dropouts and number of barriers.
# Considering average number of barriers for those who have dropped out. 
# Comparing this to average number of barriers for those who have reached PD. 
# Considering just those who are new engagements since 08 April (new system)

In [2]:
import pandas as pd
OFN = pd.read_csv('OFN.csv', encoding='latin-1')

In [3]:
# Remove all forms except Destination Achieved, Drop Out, New Engagement and Continuous Support forms. 
# Filter rows based on Record Type
desired_record_types = ['Destination Achieved', 'Drop Out', 'Continuous Support', 'New Engagement']
filtered_OFN = OFN[OFN['Record Type'].isin(desired_record_types)]

In [4]:
# Create three data frames based on Record Types. 
destination_achieved = filtered_OFN[filtered_OFN['Record Type'] == 'Destination Achieved']
continuous_support = filtered_OFN[filtered_OFN['Record Type'] == 'Continuous Support']
new_engagement = filtered_OFN[filtered_OFN['Record Type'] == 'New Engagement']
drop_out = filtered_OFN[filtered_OFN['Record Type'] == 'Drop Out']

In [5]:
# Keep only the first Continuous Support form submitted based on Created Date
continuous_support = continuous_support.sort_values(by='Created Date').groupby('Full Name').head(1)

In [6]:
# Concat the results so all forms are combined into one data frame. 
OFN_four_forms = pd.concat([destination_achieved, drop_out, continuous_support, new_engagement])

In [7]:
# Export the concatenated DataFrame to Excel
OFN_four_forms.to_excel('combined_forms.xlsx', index=False)

In [8]:
# Create data frames with specific combinations
PD_dataframe = OFN_four_forms[OFN_four_forms['Record Type'].isin(['Destination Achieved', 'Continuous Support', 'New Engagement'])]
Dropout_dataframe = OFN_four_forms[OFN_four_forms['Record Type'].isin(['Drop Out', 'Continuous Support', 'New Engagement'])]

# Find Full Names with all three forms in each data frame
full_names_with_all_forms_PD = PD_dataframe['Full Name'].value_counts()[PD_dataframe['Full Name'].value_counts() == 3].index
full_names_with_all_forms_Dropout = Dropout_dataframe['Full Name'].value_counts()[Dropout_dataframe['Full Name'].value_counts() == 3].index

# Filter the original DataFrames to keep only those Full Names
PD_final = OFN_four_forms[OFN_four_forms['Full Name'].isin(full_names_with_all_forms_PD)]
Dropout_final = OFN_four_forms[OFN_four_forms['Full Name'].isin(full_names_with_all_forms_Dropout)]

In [10]:
# Save the result to a new CSV file or update the existing one
PD_final.to_csv('PD_final.csv', index=False)

# Save the result to a new CSV file or update the existing one
Dropout_final.to_csv('Dropout_final.csv', index=False)