In [None]:
import pandas as pd

df = pd.read_csv('Homicides.csv')
df.drop(['Region', 'Subregion', 'Dimension', 'Category', 'Year', 'Unit of measurement', 'Source'], axis=1, inplace=True)
df.head()

country = df.groupby(by='Country')['VALUE'].sum()
country

In [None]:
# Religion
import numpy as np

religion = pd.read_csv('religion.csv')
religion.drop(['Region', 'Level', 'Countrycode'], axis=1, inplace=True)
religion.query('Year == 2020', inplace=True)

religion['Population'] = (
    religion['Population']
    .astype(str)
    .str.replace(',', '', regex=False)         
)
religion['Religiously_unaffiliated'] = (
    religion['Religiously_unaffiliated']
    .astype(str)
    .str.replace(',', '', regex=False)
)

religion['Religion Density'] = 1 - (religion['Religiously_unaffiliated'].astype(int) / religion['Population'].astype(int))
religion

In [None]:
# Sex
sex = pd.read_csv('Sex.csv')
sex.drop(['Iso3_code', 'Region', 'Subregion', 'Indicator', 'Dimension', 'Category'], axis=1, inplace=True)
sex = sex.groupby(by='Country')['VALUE'].sum()
sex

In [None]:
# Corruption 
corruption = pd.read_csv('Corruption.csv')
corruption.query('`Unit of measurement` == "Counts"', inplace=True)
corruption = corruption.groupby(by='Country')['VALUE'].sum()
corruption

In [None]:
# Merging
merged_df = pd.merge(religion, country, how='inner', on=['Country'])
merged_df = pd.merge(merged_df, sex, how='inner', on=['Country'], suffixes=('_hom', '_sex'))
merged_df = pd.merge(merged_df, corruption, how='inner', on=['Country'])
merged_df['Homicide Density'] = merged_df['VALUE_hom'].astype(int) / merged_df['Population'].astype(int) * 100
merged_df['Sex Assault Density'] = merged_df['VALUE_sex'].astype(int) / merged_df['Population'].astype(int) * 100
merged_df = merged_df.rename(columns={'VALUE': 'VALUES_corr'})
merged_df['Corruption Density'] = merged_df['VALUES_corr'].astype(int) / merged_df['Population'].astype(int) * 100
# merged_df.sort_values(by='Homicide Density', ascending=False)
merged_df



In [None]:
df.head()
df.dtypes


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('final_1.csv')

cols_to_clean = ['Christians', 'Muslims', 'Buddhists', 'Hindus', 'Jews', 
                 'Other_religions', 'VALUE_hom']

for col in cols_to_clean:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col].astype(str).str.replace(',', ''), errors='coerce')


df_filtered = df[df['Population'] > 2000000].copy()

df_filtered['Non_Religious_Count'] = df_filtered['Religiously_unaffiliated']

df_filtered['Religious_Count'] = df_filtered['Population'] - df_filtered['Non_Religious_Count']

df_sorted = df_filtered.sort_values(by='Population', ascending=True)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, len(df_sorted) * 0.5), sharey=True)

ax1.barh(df_sorted['Country'], df_sorted['VALUE_hom'], color='#FF0000', edgecolor='black', linewidth=0.5)

ax1.set_xlim(max(df_sorted['VALUE_hom']) * 1.1, 0)
ax1.set_xlabel('Number of Homicides')
ax1.set_title('Homicides', fontsize=14, fontweight='bold', color='#FF0000')
ax1.grid(axis='x', linestyle='--', alpha=0.5)


p1 = ax2.barh(df_sorted['Country'], df_sorted['Religious_Count'], color='#87CEFA', label='Religious', edgecolor='black', linewidth=0.5)


p2 = ax2.barh(df_sorted['Country'], df_sorted['Non_Religious_Count'], left=df_sorted['Religious_Count'], color='#E6F3FF', label='Non-Religious', edgecolor='black', linewidth=0.5)


ax2.set_xlabel('Population')
ax2.set_title('Population Distribution', fontsize=14, fontweight='bold', color='#2e86de')
ax2.legend()
ax2.grid(axis='x', linestyle='--', alpha=0.5)

ax2.ticklabel_format(style='plain', axis='x')


plt.subplots_adjust(wspace=0.0) 


plt.suptitle('Comparison: Homicides vs. Religious Composition', fontsize=16, y=1.005)

plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import requests
from io import BytesIO
from PIL import Image
from matplotlib.offsetbox import OffsetImage, AnnotationBbox


df = pd.read_csv('final_1.csv')


cols_to_clean = ['Christians', 'Muslims', 'Buddhists', 'Hindus', 'Jews', 
                 'Other_religions', 'VALUE_hom']

for col in cols_to_clean:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col].astype(str).str.replace(',', ''), errors='coerce')


df_filtered = df[df['Population'] > 2000000].copy()


x_data = df_filtered['Religion Density']
y_data = df_filtered['Homicide Density']


plt.figure(figsize=(16, 12))

sns.regplot(
    x=x_data, 
    y=y_data, 
    data=df_filtered,
    x_jitter=0.03,
    fit_reg=False, 
    scatter_kws={
        'alpha': 0.6,
        's': 100,
        'edgecolor': 'w'
    },
    color='#1f77b4'
)

plt.title('Correlation: Religion Density vs. Homicide Density', fontsize=16)
plt.xlabel('Religion Density (0.0 to 1.0)', fontsize=12)
plt.ylabel('Homicide Density (per capita)', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.5)


country_codes = {
    # Americas
    "United States": "us", "Canada": "ca", "Mexico": "mx", "Brazil": "br", "Argentina": "ar",
    "Colombia": "co", "Venezuela": "ve", "Chile": "cl", "Peru": "pe", "Ecuador": "ec",
    "Bolivia": "bo", "Paraguay": "py", "Uruguay": "uy", "Guatemala": "gt", "Honduras": "hn",
    "El Salvador": "sv", "Nicaragua": "ni", "Costa Rica": "cr", "Panama": "pa", "Cuba": "cu",
    "Dominican Republic": "do", "Haiti": "ht", "Jamaica": "jm", "Trinidad and Tobago": "tt",
    
    # Europe
    "United Kingdom": "gb", "France": "fr", "Germany": "de", "Italy": "it", "Spain": "es",
    "Portugal": "pt", "Netherlands": "nl", "Belgium": "be", "Switzerland": "ch", "Austria": "at",
    "Sweden": "se", "Norway": "no", "Denmark": "dk", "Finland": "fi", "Ireland": "ie",
    "Poland": "pl", "Czech Republic": "cz", "Hungary": "hu", "Romania": "ro", "Bulgaria": "bg",
    "Greece": "gr", "Russia": "ru", "Ukraine": "ua", "Belarus": "by", "Turkey": "tr",
    "Albania": "al", "Armenia": "am", "Serbia": "rs", "Croatia": "hr", "Bosnia and Herzegovina": "ba",
    
    # Asia & Oceania
    "China": "cn", "Japan": "jp", "India": "in", "South Korea": "kr", "North Korea": "kp",
    "Indonesia": "id", "Pakistan": "pk", "Bangladesh": "bd", "Philippines": "ph", "Vietnam": "vn",
    "Thailand": "th", "Myanmar": "mm", "Malaysia": "my", "Singapore": "sg", "Australia": "au",
    "New Zealand": "nz", "Afghanistan": "af", "Iran": "ir", "Iraq": "iq", "Saudi Arabia": "sa",
    "Israel": "il", "Syria": "sy", "Yemen": "ye", "Jordan": "jo", "Lebanon": "lb", "Kazakhstan": "kz",
    "Azerbaijan": "az", "Sri Lanka": "lk", "Nepal": "np",
    
    # Africa
    "Nigeria": "ng", "Egypt": "eg", "South Africa": "za", "Kenya": "ke", "Ethiopia": "et",
    "Ghana": "gh", "Tanzania": "tz", "Algeria": "dz", "Morocco": "ma", "Sudan": "sd",
    "Uganda": "ug", "Congo": "cg", "Democratic Republic of the Congo": "cd", "Cameroon": "cm",
    "Tunisia": "tn", "Libya": "ly"
}

def get_flag(name):
    """Fetches a flag image from flagcdn.com based on country name."""
    code = country_codes.get(name)
    if not code:
        return None 
    try:
        url = f"https://flagcdn.com/w40/{code}.png"
        response = requests.get(url, timeout=3)
        img = plt.imread(BytesIO(response.content), format='png')
        return img
    except Exception as e:
        return None


for i, row in df_filtered.iterrows():
    img = get_flag(row['Country'])
    
    if img is not None:
        
        imagebox = OffsetImage(img, zoom=0.4) 
        ab = AnnotationBbox(imagebox, (row['Religion Density'], row['Homicide Density']), frameon=False, pad=0)
        plt.gca().add_artist(ab)
    else:
        
        plt.text(
            row['Religion Density'], 
            row['Homicide Density'], 
            row['Country'], 
            fontsize=8, 
            fontweight='bold', 
            ha='right',
            alpha=0.7
        )

plt.tight_layout()
plt.show()


In [None]:
df.dtypes   

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


df = pd.read_csv('final_1.csv')


cols_to_clean = ['Christians', 'Muslims', 'Buddhists', 'Hindus', 'Jews', 
                 'Other_religions', 'VALUE_hom', 'Sex Assault Density']

for col in cols_to_clean:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col].astype(str).str.replace(',', ''), errors='coerce')


df_filtered = df[df['Population'] > 2000000].copy()


x_data = df_filtered['Religion Density']
y_data = df_filtered['Sex Assault Density']


plt.figure(figsize=(16, 12))

sns.regplot(
    x=x_data, 
    y=y_data, 
    data=df_filtered,
    x_jitter=0.03,
    fit_reg=False, 
    scatter_kws={
        'alpha': 0.6,
        's': 100,
        'edgecolor': 'w'
    },
    color='#e74c3c'
)

plt.title('Correlation: Religion Density vs. Sexual Assault Density', fontsize=16)
plt.xlabel('Religion Density (0.0 to 1.0)', fontsize=12)
plt.ylabel('Sexual Assault Density (per capita)', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.5)


for i, row in df_filtered.iterrows():
    plt.text(
        row['Religion Density'], 
        row['Sex Assault Density'], 
        row['Country'], 
        fontsize=9, 
        fontweight='bold', 
        ha='right', 
        alpha=0.8
    )

plt.tight_layout()
plt.show()