In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [5]:
df3 = pd.read_csv("processed_data1.csv")

In [6]:
df3.head(2)

Unnamed: 0,Type_of_Organisation,Was the Violence Fatal,Sex of survivor,Age of survivor,MARITAL STATUS,DATE OF INCIDENT,DATE REPORTED,LOCATION OF VIOLENCE (STATE),LOCATION OF VIOLENCE (L.G.A),LOCATION OF VIOLENCE (WARD),TIME OF THE DAY THAT INCIDENT TOOK PLACE,DOES THE SURVIVOR WANT ACCESS TO JUSTICE,OUTCOME OF PROSECUTION,DATE JUSTICE WAS RECEIVED,HAS THE CASE BEEN CLOSED,WHO CLOSED THE CASE?,DATE CASE WAS CLOSED,APPROVED BY ORG. SUPERVISOR_DATE,APPROVED BY LGA SUPERVISOR_DATE,APPROVED BY STATE SUPERVISOR_DATE,Location of Organisation,WHO REPORTED THE INCIDENT,PARENT/GUARDIAN EMPLOYMENT STATUS,SURVIVOR/VICTIM EMPLOYMENT STATUS,WHO SURVIVOR/VICTIM LIVE WITH,SEX OF PERPETRATOR,AGE OF PERPETRATOR,RELATIONSHIP WITH PERPETRATOR,VULNERABLE POPULATION,TYPE OF VIOLENCE
0,CSO,Non-fatal,Female,9,Unknown,1/25/2020,8/25/2020,Ebonyi,Abakaliki,Unknown,Unknown,Wants Access to Justice,Unknown,1/1/1900,Case Open,Unknown,1/1/1900,1/1/1900,5/6/2021,1/1/1900,Abuja FCT,Family,Self-employed,Other,PARENT/GUARDIAN,Male,18,Stranger,Child/Youth Vulnerability,Sexual Violence
1,CSO,Non-fatal,Female,4,Unknown,6/1/2020,8/25/2020,Cross River,Calabar Municipal,Unknown,Evening,Wants Access to Justice,Unknown,1/1/1900,Case Open,Unknown,1/1/1900,1/1/1900,5/6/2021,3/11/2021,Abuja FCT,Family,Self-employed,Other,PARENT/GUARDIAN,Male,73,Extended Family,Child/Youth Vulnerability,Sexual Violence


In [8]:
def determine_vulnerability_safe(row, threshold=3):
    score = 0
    reasons = []
    
    raw_age = row.get('Age of survivor', None)
    try:
        age = float(raw_age) if raw_age not in (None, '', float('nan')) else None
    except (ValueError, TypeError):
        age = None

    def norm(x):
        return '' if pd.isna(x) else str(x).strip().lower()

    employment = norm(row.get('SURVIVOR/VICTIM EMPLOYMENT STATUS'))
    marital_status = norm(row.get('MARITAL STATUS'))
    violence_type = norm(row.get('TYPE OF VIOLENCE'))
    living_with = norm(row.get('WHO SURVIVOR/VICTIM LIVE WITH'))
    vulnerability_category = norm(row.get('VULNERABLE POPULATION'))

    # sets with possible synonyms
    unemp_set = {'unemployed', 'informal sector', 'informal', 'not employed'}
    high_risk_violence = {'sexual violence', 'emotional/psychological abuse', 'emotional abuse', 'psychological abuse'}
    living_risk = {'lives alone', 'alone', 'lives with abuser', 'with abuser', 'living with abuser'}

    if age is not None and (age < 18 or age > 65):
        score += 2
        reasons.append('age')

    if employment in unemp_set:
        score += 1
        reasons.append('employment')

    if marital_status in {'widowed', 'separated', 'divorced'}:
        score += 1
        reasons.append('marital_status')

    if violence_type in high_risk_violence:
        score += 2
        reasons.append('violence_type')

    if living_with in living_risk:
        score += 2
        reasons.append('living_with')

    if vulnerability_category not in {'', 'no vulnerability', 'unknown', 'none'}:
        score += 2
        reasons.append('vulnerable_category')

    label = 'VULNERABLE' if score >= threshold else 'NOT_VULNERABLE'
    return {'label': label, 'score': score, 'reasons': reasons}

In [10]:
# Apply the function to each row
results = df3.apply(lambda row: determine_vulnerability_safe(row), axis=1)

results_df1 = pd.DataFrame(results.tolist())

# Joining back to your original df2
df3 = pd.concat([df3, results_df1], axis=1)

In [12]:
df3.head(2)

Unnamed: 0,Type_of_Organisation,Was the Violence Fatal,Sex of survivor,Age of survivor,MARITAL STATUS,DATE OF INCIDENT,DATE REPORTED,LOCATION OF VIOLENCE (STATE),LOCATION OF VIOLENCE (L.G.A),LOCATION OF VIOLENCE (WARD),TIME OF THE DAY THAT INCIDENT TOOK PLACE,DOES THE SURVIVOR WANT ACCESS TO JUSTICE,OUTCOME OF PROSECUTION,DATE JUSTICE WAS RECEIVED,HAS THE CASE BEEN CLOSED,WHO CLOSED THE CASE?,DATE CASE WAS CLOSED,APPROVED BY ORG. SUPERVISOR_DATE,APPROVED BY LGA SUPERVISOR_DATE,APPROVED BY STATE SUPERVISOR_DATE,Location of Organisation,WHO REPORTED THE INCIDENT,PARENT/GUARDIAN EMPLOYMENT STATUS,SURVIVOR/VICTIM EMPLOYMENT STATUS,WHO SURVIVOR/VICTIM LIVE WITH,SEX OF PERPETRATOR,AGE OF PERPETRATOR,RELATIONSHIP WITH PERPETRATOR,VULNERABLE POPULATION,TYPE OF VIOLENCE,label,score,reasons,label.1,score.1,reasons.1
0,CSO,Non-fatal,Female,9,Unknown,1/25/2020,8/25/2020,Ebonyi,Abakaliki,Unknown,Unknown,Wants Access to Justice,Unknown,1/1/1900,Case Open,Unknown,1/1/1900,1/1/1900,5/6/2021,1/1/1900,Abuja FCT,Family,Self-employed,Other,PARENT/GUARDIAN,Male,18,Stranger,Child/Youth Vulnerability,Sexual Violence,VULNERABLE,6,"[age, violence_type, vulnerable_category]",VULNERABLE,6,"[age, violence_type, vulnerable_category]"
1,CSO,Non-fatal,Female,4,Unknown,6/1/2020,8/25/2020,Cross River,Calabar Municipal,Unknown,Evening,Wants Access to Justice,Unknown,1/1/1900,Case Open,Unknown,1/1/1900,1/1/1900,5/6/2021,3/11/2021,Abuja FCT,Family,Self-employed,Other,PARENT/GUARDIAN,Male,73,Extended Family,Child/Youth Vulnerability,Sexual Violence,VULNERABLE,6,"[age, violence_type, vulnerable_category]",VULNERABLE,6,"[age, violence_type, vulnerable_category]"


In [7]:
#df3.to_csv("GBVV_data.csv", index = False)