---
toc: true
comments: true
layout: notebook
author: Tirth Thakkar 
title: Survivorship Lab
description: Survivorship Lab for APES Data analysis
categories: [week05, APES]

---

In [9]:
import pandas as pd


def analyze_deaths(csv_file, stats_csv_file):
    # Function to calculate age range for a given age
    def calculate_age_range(age):
        return int(age // 5) * 5

    # Load the CSV file into a DataFrame
    df = pd.read_csv(csv_file)

    # Create a new column 'Age Range' to store the age range for each person
    df['Age Range'] = df['Age of Death'].apply(calculate_age_range)

    # Initialize variables to store statistics
    age_range_stats = {}
    total_people = len(df)
    surviving_members = total_people
    # Iterate through each unique age range
    for age_range in sorted(df['Age Range'].unique()):
        # Filter the DataFrame for the current age range
        age_range_df = df[df['Age Range'] == age_range]
        
        # Calculate the number of deaths in the current and the previous age range
        deaths_in_range = len(age_range_df)
        deaths_in_previous_range = len(df[df['Age Range'] == age_range - 5])
        
        # Calculate the number of surviving members
        surviving_members = total_people - deaths_in_range
        
        # Calculate the mortality rate
        mortality_rate = (deaths_in_range + deaths_in_previous_range) / total_people
        
        # Store the statistics in a dictionary
        age_range_stats[age_range] = {
            'Deaths in Range': deaths_in_range,
            'Surviving Members': surviving_members,
            'Mortality Rate': mortality_rate
        }

    # Print the statistics
    for age_range, stats in age_range_stats.items():
        print(f"Age Range {age_range}-{age_range + 4.9} years:")
        print(f"  Deaths in Range: {stats['Deaths in Range']}")
        print(f"  Surviving Members: {stats['Surviving Members']}")
        print(f"  Mortality Rate: {stats['Mortality Rate']:.2%}")
        print()

    # Create a DataFrame for the statistics
    stats_df = pd.DataFrame.from_dict(age_range_stats, orient='index')
    stats_df.index.name = 'Age Range'
    stats_df.reset_index(inplace=True)

    # Save the statistics to a new CSV file
    stats_df.to_csv(stats_csv_file, index=False)

    print(f"Statistics saved to {stats_csv_file}")


if __name__ == '__main__':
    analyze_deaths('/home/tirth/vscode/Mort-Pages-Personal/_data/19th_Cent_NJ_Burials_Women.csv', '/home/tirth/vscode/Mort-Pages-Personal/_data/age_range_statistics_women_19th_cent.csv')


Age Range 0-4.9 years:
  Deaths in Range: 8
  Surviving Members: 142
  Mortality Rate: 5.33%

Age Range 5-9.9 years:
  Deaths in Range: 2
  Surviving Members: 148
  Mortality Rate: 6.67%

Age Range 10-14.9 years:
  Deaths in Range: 4
  Surviving Members: 146
  Mortality Rate: 4.00%

Age Range 15-19.9 years:
  Deaths in Range: 2
  Surviving Members: 148
  Mortality Rate: 4.00%

Age Range 20-24.9 years:
  Deaths in Range: 9
  Surviving Members: 141
  Mortality Rate: 7.33%

Age Range 25-29.9 years:
  Deaths in Range: 5
  Surviving Members: 145
  Mortality Rate: 9.33%

Age Range 30-34.9 years:
  Deaths in Range: 7
  Surviving Members: 143
  Mortality Rate: 8.00%

Age Range 35-39.9 years:
  Deaths in Range: 4
  Surviving Members: 146
  Mortality Rate: 7.33%

Age Range 40-44.9 years:
  Deaths in Range: 1
  Surviving Members: 149
  Mortality Rate: 3.33%

Age Range 45-49.9 years:
  Deaths in Range: 8
  Surviving Members: 142
  Mortality Rate: 6.00%

Age Range 50-54.9 years:
  Deaths in Range: 