In [1]:
import pandas as pd

# ------------------------------------------------------------------
# 1) Load the two datasets
# ------------------------------------------------------------------
df_pop    = pd.read_csv("https://raw.githubusercontent.com/Ramil-cyber/Research_Linking_Analyzing_Deaths_US_Prisons/refs/heads/main/Data_Manipulation/Prison_population_2015_2022_updated.csv", low_memory=False)
df_deaths = pd.read_csv("https://raw.githubusercontent.com/Ramil-cyber/Research_Linking_Analyzing_Deaths_US_Prisons/refs/heads/main/Data_Manipulation/Cleaned_BJA_BJS_prison_deaths_by_state_year_2015_2022.csv", low_memory=False)



In [2]:
# ------------------------------------------------------------------
# 2) Standardize merge keys on pop dataset
# ------------------------------------------------------------------
df_pop['State'] = df_pop['state_name'].str.upper()
df_pop['Year']  = df_pop['year']

# ------------------------------------------------------------------
# 3) Rename 'Deaths' to lowercase 'deaths' in deaths dataset
# ------------------------------------------------------------------
df_deaths = df_deaths.rename(columns={'Deaths': 'deaths'})

# ------------------------------------------------------------------
# 4) Merge on Year and State
# ------------------------------------------------------------------
combined = pd.merge(
    df_pop,
    df_deaths,
    on=['Year', 'State'],
    how='inner'
)

# ------------------------------------------------------------------
# 5) Drop unwanted columns: original state/year and the merge keys
# ------------------------------------------------------------------
combined = combined.drop(columns=[
    'State', 'Year'        
])

# ------------------------------------------------------------------
# 6) Save the cleaned, merged dataset
# ------------------------------------------------------------------
output_path = 'Combined_death_pop.csv'
combined.to_csv(output_path, index=False)

# ------------------------------------------------------------------
# 7) Confirmation
# ------------------------------------------------------------------
print(f"Saved merged dataset without 'State'/'Year' and with 'deaths' lowercase to:\n{output_path}")
print(combined.head())


Saved merged dataset without 'State'/'Year' and with 'deaths' lowercase to:
Combined_death_pop.csv
   year state_abbr state_name  state_fips region division  total_pop_15to64  \
0  2022         AK     Alaska           2   West  Pacific            484376   
1  2021         AK     Alaska           2   West  Pacific            486916   
2  2020         AK     Alaska           2   West  Pacific            488623   
3  2019         AK     Alaska           2   West  Pacific            493016   
4  2018         AK     Alaska           2   West  Pacific            497876   

   female_pop_15to64  male_pop_15to64  aapi_pop_15to64  ...  \
0             224281           260095            44427  ...   
1             225591           261325            44109  ...   
2             226819           261804            43920  ...   
3             229285           263731            43819  ...   
4             231938           265938            43449  ...   

   male_jail_pop_rate  total_prison_pop  female