In [1]:
import pandas as pd  
import os


In [2]:
# Load the CSV files into dataframes
homicide_count_df = pd.read_csv("global_intentional_homicide_count.csv")
homicide_conviction_df = pd.read_csv("global_homicide_conviction_count.csv")

In [3]:
justice_rate_rows = []

# Iterate over each row in the homicide conviction dataframe
for _, conviction_row in homicide_conviction_df.iterrows():
    country = conviction_row['Country']

    # Check if the country exists in the homicide count dataframe
    count_row = homicide_count_df[homicide_count_df['Country'] == country]

    if not count_row.empty:
        # Extract the row as a dictionary
        count_row = count_row.iloc[0]

        # Prepare a new row for the justice rate dataframe
        justice_rate_row = {'Country': country}

        # Perform calculations for each year column
        for column in homicide_conviction_df.columns[1:]:
            conviction_value = conviction_row[column]
            count_value = count_row[column] if column in count_row else None

            if count_value is not None:
                # Avoid division by zero
                if count_value == 0:
                    count_value = 1

                # Calculate the justice rate
                justice_rate = conviction_value / count_value

                # Cap the value at 1
                if justice_rate > 1:
                    justice_rate = 1

                justice_rate_row[column] = justice_rate

        # Append the new row to the list
        justice_rate_rows.append(justice_rate_row)

# Create the justice rate dataframe
homicide_justice_rate_df = pd.DataFrame(justice_rate_rows)

In [4]:
homicide_justice_rate_df.tail(60)

Unnamed: 0,Country,2003,2004,2005,2006,2007,2008,2009,2010,2011,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
59,Latvia,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.955752,...,1.0,0.80597,1.0,1.0,1.0,1.0,0.971429,1.0,1.0,1.0
60,Liechtenstein,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
61,Lithuania,0.920932,0.502753,0.706989,0.965278,0.758123,0.926174,0.845238,1.0,1.0,...,1.0,0.755814,0.947712,1.0,1.0,1.0,0.633663,1.0,1.0,1.0
62,Luxembourg,0.722222,1.0,0.541667,0.309524,0.309524,0.270833,0.433333,0.216667,0.541667,...,0.541667,0.433333,0.4,0.5,0.0,0.541667,0.333333,1.0,0.5,0.47619
63,Malaysia,0.354867,0.403421,0.400662,0.270408,0.307515,0.335846,0.357398,0.371296,0.378302,...,0.393137,0.40261,0.439693,0.529024,0.620743,0.646774,0.79249,0.825103,0.408088,0.408088
64,Maldives,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
65,Malta,0.37037,0.571429,0.5,1.0,1.0,0.166667,1.0,0.75,1.0,...,0.0,0.75,0.526316,0.111111,0.875,1.0,0.285714,0.5,0.125,0.877193
66,Mauritius,0.033175,0.232227,0.165877,0.331754,0.265403,0.630332,0.394737,0.30303,0.21875,...,0.842105,0.857143,0.571429,0.352814,0.314672,0.431217,0.332653,0.342437,0.401478,0.386256
67,Mexico,0.336175,0.452353,0.444108,0.407865,0.47908,0.311295,0.20204,0.179291,0.162643,...,0.169082,0.162958,0.060874,0.074659,0.047131,0.052863,0.04468,0.052745,0.088653,0.104903
68,Monaco,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.166667,0.0,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667


In [5]:
# Step 1: Calculate the mean for each year
mean_values = homicide_justice_rate_df.iloc[:, 1:].mean()

# Step 2: Create rows for 'United States' and 'Russia' with mean values
us_row = ['United States'] + mean_values.tolist()

# Step 3: Append the new rows to the dataframe
homicide_justice_rate_df.loc[len(homicide_justice_rate_df)] = us_row

# Step 4: Sort the DataFrame by the 'Country' column to maintain order
homicide_justice_rate_df = homicide_justice_rate_df.sort_values(by='Country').reset_index(drop=True)


In [6]:
#Rename 'Russian Federation' to 'Russia'
homicide_justice_rate_df['Country'] = homicide_justice_rate_df['Country'].replace('Russian Federation', 'Russia')
homicide_justice_rate_df['Country'] = homicide_justice_rate_df['Country'].replace('Republic of Korea', 'South Korea')

In [7]:
# Save the resulting dataframe to a new CSV file
homicide_justice_rate_df.to_csv("homicide_justice_rate.csv", index=False)