In [None]:
#dependencies 
import matplotlib.pyplot as plt 
import pandas as pd 
import numpy as np
import requests 
import time 

In [None]:
#Open CSV file 
gun_violence = "gun_violence_data.csv"
gun_violence_pd = pd.read_csv(gun_violence)
# gun_violence_pd.head()
gun_violence_red = gun_violence_pd[["incident_id", "date", "city_or_county", "n_killed", "n_injured", 
                                 "participant_age_group", "participant_gender",
                                "n_guns_involved", "participant_status", "participant_type", "state",
                                 'congressional_district']]

gun_violence_red.head(5)
#Rename The Columns 

gun_violence_red = gun_violence_red.rename(columns={"incident_id": "Incident ID",
                                            "date": "Date",
                                            "state": "State",
                                            "city_or_county": "City/County",
                                            "n_killed": "Killed",
                                            "n_injured": "Injured",
                                            "participant_age_group": "Age Group",
                                            "participant_gender": "Gender",
                                            "gun_stolen": "Gun Stolen",    
                                            "n_guns_involved": "Number of Guns involved",
                                            "participant_status": "Participant Status",
                                            "participant_type": "Associated with Participant",
                                            "state" : "State",       
                                            "congressional_district": "Congressional District"                                            
})

gun_violence_red.head()

In [None]:
# counting for genders:

def countGender(genderStr,ismale=True):
    x = genderStr
    
    try:
        results = [1 if 'female' in e.lower() else 0 for e in x.split('||')]
        females = sum(results)
        males = len(results) - females
#         print(f'Females: {females} & Males: {males}')
        if ismale:
            return males
        else:
            return females
    except:
#         print(f"Data not available: {x}")
        return x
# Female
gun_female = gun_violence_red['Gender'].apply(lambda my_str: countGender(my_str,ismale=False))
# Male
gun_male = gun_violence_red['Gender'].apply(lambda my_str: countGender(my_str))
gun_violence_df = gun_violence_red
gun_violence_df['Female'] = gun_female
gun_violence_df['Male'] = gun_male
gun_violence_df = gun_violence_df.drop(columns=["Gender"])
display(gun_violence_df.head())
def countType(raw_str, str_type):
    
    try:
        results = [1 if str_type in e.lower() else 0 for e in raw_str.split('||')]

        return sum(results)
    except:
        return raw_str

In [None]:
#Creating/separating the individual variables for each age group
# Adult
adult_group = gun_violence_df['Age Group'].apply(lambda x: countType(x, 'adult'))
adult_group.head(10)
# Teen
teen_group = gun_violence_df['Age Group'].apply(lambda x: countType(x, 'teen'))
teen_group.head(10)
# Child
child_group = gun_violence_df['Age Group'].apply(lambda x: countType(x, 'child'))
child_group.head(10)
# Killed
gun_kill = gun_violence_pd['participant_status'].apply(lambda x: countType(x, 'killed'))
# Injured 
gun_injured = gun_violence_pd['participant_status'].apply(lambda x: countType(x, 'injured'))
# Unharmed
gun_unharmed = gun_violence_pd['participant_status'].apply(lambda x: countType(x, 'unharmed'))
# Arrested
gun_arrested = gun_violence_pd['participant_status'].apply(lambda x: countType(x, 'arrested'))
gun_violence_df["Children (0-11)"] = child_group
gun_violence_df["Teens (12-17)"] = teen_group
gun_violence_df["Adults (18+)"] = adult_group
gun_violence_df["Killed"] = gun_kill
gun_violence_df["Injured"] = gun_injured
gun_violence_df["Unharmed"] = gun_unharmed
gun_violence_df["Arrested"] = gun_arrested
gun_violence_df = gun_violence_df.drop(columns=["Age Group"])
gun_violence_df = gun_violence_df.drop(columns=["Associated with Participant"])
gun_violence_df = gun_violence_df.drop(columns=["Participant Status"])


In [None]:
#Reading in and creating a dataframe for congressional partisan lean
partisan_read = pd.read_csv("partisan_data.csv")
partisan_df = pd.DataFrame(partisan_read)

In [None]:
#Creating columns to house state and district
partisan_df["State"] = ""
partisan_df['Congressional District'] = ""
#Populating state and district columns using existing data
for row in range (0, len(partisan_df)):
    partisan_df["State"][row] = partisan_df["District"][row][0:-2]
    partisan_df["Congressional District"][row] = partisan_df["District"][row].split(" ")[-1]
    if partisan_df["State"][row][-1] == " ":
        partisan_df["State"][row] = partisan_df["State"][row][:-1]

In [None]:
#Creating a column to format partisan lean data
partisan_df["partisan_lean"] = partisan_df["PVI"]

In [None]:
#converting "R+" and "D+" into positive (for Republican) and negative (for Democrat) numbers
for row in range (0, len(partisan_df)):
    if type(partisan_df['PVI'][row]) == str:
        if partisan_df['PVI'][row][0:2] == "R+":
            partisan_df['partisan_lean'][row] = int(partisan_df['PVI'][row][2:])
        elif partisan_df['partisan_lean'][row] == "EVEN":
            partisan_df['partisan_lean'][row] = 0
        else:
            partisan_df['partisan_lean'][row] = int("-" + partisan_df['PVI'][row][2:])
    else:
        print("??!!")

In [None]:
partisan_df_clean = partisan_df[['State', 'Congressional District', 'partisan_lean']]

In [None]:
#Removing 'At Large' districts in preparation to merge.
partisan_merge_ready = partisan_df_clean[partisan_df_clean["Congressional District"] != "AL"]

In [None]:
gun_violence_red = gun_violence_red.dropna()

In [None]:
gun_violence_red["Congressional District"] = pd.Series([str(int(i)) for i in  gun_violence_red["Congressional District"]])

In [None]:
gun_partisan_merge_df = gun_violence_red.merge(
    partisan_merge_ready,
    on=["State", "Congressional District"],
    how='left'
)

In [None]:
#combine the data sets to have partisan lean and gun violence in the same dataframe
gun_partisan_merge_df["Congressional District"]

In [None]:
#group by partisan lean
df_by_partisan_lean = gun_partisan_merge_df.groupby(by="partisan_lean").sum()

In [None]:
#prepping x and y axes
x_axis = df_by_partisan_lean.index
killed = df_by_partisan_lean["Killed"]
injured = df_by_partisan_lean["Injured"]


In [None]:
#creating plot
plt.bar(x_axis, killed)
plt.bar(x_axis, injured, bottom=killed)

plt.legend(("Killed", "Injured"))
plt.xlabel("<--- More Democratic                   More Republican--->")
plt.ylabel("Injured and Killed")
plt.title("Gun Violence by Partisan Lean of District")


plt.savefig("gun_violence_by_partisan_lean")

In [None]:
#Reading in csv with data on the density and income of congressional districts
density_and_income = pd.read_csv("density_and_income.csv")

In [None]:
#Creating density and income plot
density_rank = density_and_income["National Density Rank"]
income_rank = density_and_income["Med Income Rank"]
plt.scatter(density_rank, income_rank)
plt.grid()
plt.xlabel("Population Density Rank out of 435 Districts")
plt.ylabel("Median Income Rank out of 435 Districts")
plt.title("Gun Violence by Density and Income", fontsize=16)
plt.text(30, 102, "Rich, Dense", fontsize=14, color='blue')
plt.text(30, 310, "Poor, Dense", fontsize=14, color='cyan')
plt.text(260, 310, "Poor, Sparse", fontsize=14, color='red')
plt.text(260, 102, "Rich, Sparse", fontsize=14, color='magenta')
plt.savefig("gun_violence_by_density_income")