# Crime Data Analysis: Tableau Data Fix

### Importing Libraries

In [1]:
# Importing Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
import os

In [2]:
# Define path
path = r'/Users/ColinLynt/Documents/CareerFoundry/01-22 Crime Data Analysis'

In [3]:
# Importing homicide report data
df_hr = pd.read_pickle(os.path.join(path, '02 Data', 'Prepared data', 'Homicide_df.pkl'))

In [4]:
# Setting max rows/columns to see 50 rows/columns
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)

### Data Cleaning and Preperations

In [5]:
# Checking Data
df_hr.head()

Unnamed: 0,City,State,Agency,Agentype,Solved,Year,Month,ActionType,Homicide,Situation,VicAge,VicSex,VicRace,VicEthnic,OffAge,OffSex,OffRace,OffEthnic,Weapon,Relationship,Circumstance,VicCount,OffCount
0,"Autauga, AL",Alabama,Autauga County,Sheriff,No,1976,September,Normal update,Murder and non-negligent manslaughter,Single victim/unknown offender(s),30.0,Male,Black,Unknown or not reported,31.138045,Unknown,Unknown,Unknown or not reported,Other or type unknown,Relationship not determined,Other,0,0
1,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1977,January,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,65.0,Female,Black,Unknown or not reported,62.0,Male,Black,Unknown or not reported,Other or type unknown,Acquaintance,Brawl due to influence of alcohol,0,0
2,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1977,March,Normal update,Murder and non-negligent manslaughter,Single victim/multiple offenders,48.0,Male,White,Unknown or not reported,52.0,Male,White,Unknown or not reported,"Handgun - pistol, revolver, etc",Acquaintance,Other arguments,0,1
3,"Autauga, AL",Alabama,Prattville,Municipal police,Yes,1977,March,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,27.0,Male,Black,Unknown or not reported,22.0,Female,Black,Unknown or not reported,Shotgun,Husband,Other arguments,0,0
4,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1977,August,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,17.0,Female,Black,Unknown or not reported,21.0,Male,Black,Unknown or not reported,Knife or cutting instrument,Acquaintance,Circumstances undetermined,0,0


In [6]:
# Assigning numerical values for months of the year
def nmonth (row):
    if row['Month'] == 'January':
        return '01'
    elif row ['Month'] == 'February':
        return '02'
    elif row ['Month'] == 'March':
        return '03'
    elif row ['Month'] == 'April':
        return '04'
    elif row ['Month'] == 'May':
        return '05'
    elif row ['Month'] == 'June':
        return '06'
    elif row ['Month'] == 'July':
        return '07'
    elif row ['Month'] == 'August':
        return '08'
    elif row ['Month'] == 'September':
        return '09'
    elif row ['Month'] == 'October':
        return '10'
    elif row ['Month'] == 'November':
        return '11'
    elif row ['Month'] == 'December':
        return '12'

In [7]:
# Adding new column for numerical months of the year
df_hr['NMonth'] = df_hr.apply(lambda row : nmonth(row), axis = 1)

In [8]:
# Changing datatype of NMonth to int64
df_hr['NMonth'] = df_hr['NMonth'].astype('int64')

In [9]:
# Concatenating two int64 values to make complete date 
df_hr['Date'] = df_hr['Year'].map(str) + '-' + df_hr['NMonth'].map(str)

In [10]:
# Checking the Date column
df_hr.head(20)

Unnamed: 0,City,State,Agency,Agentype,Solved,Year,Month,ActionType,Homicide,Situation,VicAge,VicSex,VicRace,VicEthnic,OffAge,OffSex,OffRace,OffEthnic,Weapon,Relationship,Circumstance,VicCount,OffCount,NMonth,Date
0,"Autauga, AL",Alabama,Autauga County,Sheriff,No,1976,September,Normal update,Murder and non-negligent manslaughter,Single victim/unknown offender(s),30.0,Male,Black,Unknown or not reported,31.138045,Unknown,Unknown,Unknown or not reported,Other or type unknown,Relationship not determined,Other,0,0,9,1976-9
1,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1977,January,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,65.0,Female,Black,Unknown or not reported,62.0,Male,Black,Unknown or not reported,Other or type unknown,Acquaintance,Brawl due to influence of alcohol,0,0,1,1977-1
2,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1977,March,Normal update,Murder and non-negligent manslaughter,Single victim/multiple offenders,48.0,Male,White,Unknown or not reported,52.0,Male,White,Unknown or not reported,"Handgun - pistol, revolver, etc",Acquaintance,Other arguments,0,1,3,1977-3
3,"Autauga, AL",Alabama,Prattville,Municipal police,Yes,1977,March,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,27.0,Male,Black,Unknown or not reported,22.0,Female,Black,Unknown or not reported,Shotgun,Husband,Other arguments,0,0,3,1977-3
4,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1977,August,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,17.0,Female,Black,Unknown or not reported,21.0,Male,Black,Unknown or not reported,Knife or cutting instrument,Acquaintance,Circumstances undetermined,0,0,8,1977-8
5,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1977,October,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,62.0,Male,Asian,Unknown or not reported,80.0,Male,Black,Unknown or not reported,Shotgun,Stranger,Robbery,0,0,10,1977-10
6,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1978,May,Normal update,Murder and non-negligent manslaughter,Single victim/multiple offenders,54.0,Male,Black,Unknown or not reported,54.0,Female,Black,Unknown or not reported,Knife or cutting instrument,Husband,Circumstances undetermined,0,2,5,1978-5
7,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1978,December,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,48.0,Female,White,Unknown or not reported,26.0,Male,White,Unknown or not reported,"Blunt object - hammer, club, etc",Acquaintance,Other,0,0,12,1978-12
8,"Autauga, AL",Alabama,Autauga County,Sheriff,No,1979,February,Normal update,Murder and non-negligent manslaughter,Single victim/unknown offender(s),33.04365,Female,Unknown,Unknown or not reported,31.138045,Unknown,Unknown,Unknown or not reported,Other or type unknown,Relationship not determined,Circumstances undetermined,0,0,2,1979-2
9,"Autauga, AL",Alabama,Prattville,Municipal police,Yes,1979,December,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,51.0,Male,Black,Unknown or not reported,24.0,Female,Black,Unknown or not reported,Knife or cutting instrument,Common-law husband,Other arguments,0,0,12,1979-12


In [11]:
# Dropping NMonth
df_hr = df_hr.drop(columns = ['NMonth'])

In [12]:
# Checking for dropped column
df_hr.head()

Unnamed: 0,City,State,Agency,Agentype,Solved,Year,Month,ActionType,Homicide,Situation,VicAge,VicSex,VicRace,VicEthnic,OffAge,OffSex,OffRace,OffEthnic,Weapon,Relationship,Circumstance,VicCount,OffCount,Date
0,"Autauga, AL",Alabama,Autauga County,Sheriff,No,1976,September,Normal update,Murder and non-negligent manslaughter,Single victim/unknown offender(s),30.0,Male,Black,Unknown or not reported,31.138045,Unknown,Unknown,Unknown or not reported,Other or type unknown,Relationship not determined,Other,0,0,1976-9
1,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1977,January,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,65.0,Female,Black,Unknown or not reported,62.0,Male,Black,Unknown or not reported,Other or type unknown,Acquaintance,Brawl due to influence of alcohol,0,0,1977-1
2,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1977,March,Normal update,Murder and non-negligent manslaughter,Single victim/multiple offenders,48.0,Male,White,Unknown or not reported,52.0,Male,White,Unknown or not reported,"Handgun - pistol, revolver, etc",Acquaintance,Other arguments,0,1,1977-3
3,"Autauga, AL",Alabama,Prattville,Municipal police,Yes,1977,March,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,27.0,Male,Black,Unknown or not reported,22.0,Female,Black,Unknown or not reported,Shotgun,Husband,Other arguments,0,0,1977-3
4,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1977,August,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,17.0,Female,Black,Unknown or not reported,21.0,Male,Black,Unknown or not reported,Knife or cutting instrument,Acquaintance,Circumstances undetermined,0,0,1977-8


In [13]:
# Assigning numerical value to VicSex - 0 is Unknown / 1 is Male / 2 is Female
def categories_3 (row):
    if row['VicSex'] == 'Unknown':
        return '0'
    elif row ['VicSex'] == 'Male':
        return '1'
    elif row ['VicSex'] == 'Female':
        return '2'

In [14]:
# Adding new column NVicSex - Gives numerical value to VicSex column
df_hr['NVicSex'] = df_hr.apply(lambda row : categories_3(row), axis = 1)

In [15]:
# Assigning numerical value to OffSex - 0 is Unknown / 1 is Male / 2 is Female
def categories_4 (row):
    if row['OffSex'] == 'Unknown':
        return '0'
    elif row ['OffSex'] == 'Male':
        return '1'
    elif row ['OffSex'] == 'Female':
        return '2'

In [16]:
# Adding new column NOffSex - Gives numerical value to OffSex column
df_hr['NOffSex'] = df_hr.apply(lambda row : categories_4(row), axis = 1)

In [17]:
# Assigning numerical value to VicRace
def categories_5 (row):
    if row['VicRace'] == 'Unknown':
        return '0'
    elif row ['VicRace'] == 'White':
        return '1'
    elif row ['VicRace'] == 'Black':
        return '2'
    elif row ['VicRace'] == 'Asian':
        return '3'
    elif row ['VicRace'] == 'American Indian or Alaskan Native':
        return '4'
    elif row ['VicRace'] == 'Native Hawaiian or Pacific Islander':
        return '5'

In [18]:
# Adding new column NVicRace - Gives numerical value to VicRace column
df_hr['NVicRace'] = df_hr.apply(lambda row : categories_5(row), axis = 1)

In [19]:
# Assigning numerical value to OffRace
def categories_6 (row):
    if row['OffRace'] == 'Unknown':
        return '0'
    elif row ['OffRace'] == 'White':
        return '1'
    elif row ['OffRace'] == 'Black':
        return '2'
    elif row ['OffRace'] == 'Asian':
        return '3'
    elif row ['OffRace'] == 'American Indian or Alaskan Native':
        return '4'
    elif row ['OffRace'] == 'Native Hawaiian or Pacific Islander':
        return '5'

In [20]:
# Adding new column NOffRace - Gives numerical value to OffRace column
df_hr['NOffRace'] = df_hr.apply(lambda row : categories_6(row), axis = 1)

In [21]:
# Assigning numerical value to closeness of victim and offender
def closeness (row):
    if row['Relationship'] == 'Stranger':
        return '1'
    elif row ['Relationship'] == 'Acquaintance':
        return '2'
    elif row ['Relationship'] == 'Other - known to victim':
        return '2'
    elif row ['Relationship'] == 'Wife':
        return '4'
    elif row ['Relationship'] == 'Friend':
        return '3'
    elif row ['Relationship'] == 'Girlfriend':
        return '3'
    elif row ['Relationship'] == 'Son':
        return '4'
    elif row ['Relationship'] == 'Other family':
        return '4'
    elif row ['Relationship'] == 'Husband':
        return '4'
    elif row ['Relationship'] == 'Boyfriend':
        return '3'
    elif row ['Relationship'] == 'Daughter':
        return '4'
    elif row ['Relationship'] == 'Neighbor':
        return '2'
    elif row ['Relationship'] == 'Brother':
        return '4'
    elif row ['Relationship'] == 'Father':
        return '4'
    elif row ['Relationship'] == 'Mother':
        return '4'
    elif row ['Relationship'] == 'In-law':
        return '4'
    elif row ['Relationship'] == 'Common-law wife':
        return '3'
    elif row ['Relationship'] == 'Common-law husband':
        return '3'
    elif row ['Relationship'] == 'Ex-wife':
        return '3'
    elif row ['Relationship'] == 'Stepfather':
        return '4'
    elif row ['Relationship'] == 'Sister':
        return '4'
    elif row ['Relationship'] == 'Homosexual relationship':
        return '3'
    elif row ['Relationship'] == 'Stepson':
        return '4'
    elif row ['Relationship'] == 'Ex-husband':
        return '3'
    elif row ['Relationship'] == 'Stepdaughter':
        return '4'
    elif row ['Relationship'] == 'Employer':
        return '2'
    elif row ['Relationship'] == 'Employee':
        return '2'
    elif row ['Relationship'] == 'Stepmother':
        return '4'

In [22]:
# Adding new column Closeness_Rating - Gives ranked value to Relationships
df_hr['Closeness_Rating'] = df_hr.apply(lambda row : closeness(row), axis = 1)

In [23]:
# Assigning numerical value to Weapon
def categories_7 (row):
    if row['Weapon'] == 'Handgun - pistol, revolver, etc':
        return '1'
    elif row ['Weapon'] == 'Knife or cutting instrument':
        return '2'
    elif row ['Weapon'] == 'Firearm, type not stated':
        return '3'
    elif row ['Weapon'] == 'Personal weapons, includes beating':
        return '4'
    elif row ['Weapon'] == 'Other or type unknown':
        return '5'
    elif row ['Weapon'] == 'Shotgun':
        return '6'
    elif row ['Weapon'] == 'Blunt object - hammer, club, etc':
        return '7'
    elif row ['Weapon'] == 'Rifle':
        return '8'
    elif row ['Weapon'] == 'Strangulation - hanging':
        return '9'
    elif row ['Weapon'] == 'Fire':
        return '10'
    elif row ['Weapon'] == 'Asphyxiation - includes death by gas':
        return '11'
    elif row ['Weapon'] == 'Other gun':
        return '12'
    elif row ['Weapon'] == 'Narcotics or drugs, sleeping pills':
        return '13'
    elif row ['Weapon'] == 'Drowning':
        return '14'
    elif row ['Weapon'] == 'Poison - does not include gas':
        return '15'
    elif row ['Weapon'] == 'Explosives':
        return '16'
    elif row ['Weapon'] == 'Pushed or thrown out window':
        return '17'

In [24]:
# Adding new column NWeapon - Gives numerical value to Weapon column
df_hr['NWeapon'] = df_hr.apply(lambda row : categories_7(row), axis = 1)

In [25]:
# Assigning numerical value to yes or no in Solved column - No is 0 / Yes is 1
def categories (row):
    if row['Solved'] == 'No':
        return '0'
    elif row ['Solved'] == 'Yes':
        return '1'

In [26]:
# Adding new column NSolved - Gives numerical value to Solved column
df_hr['NSolved'] = df_hr.apply(lambda row : categories(row), axis = 1)

In [27]:
# Changing NSolved to int64
df_hr['NSolved'] = df_hr['NSolved'].astype('int64')

In [28]:
# Changing NVicSex to int64
df_hr['NVicSex'] = df_hr['NVicSex'].astype('int64')

In [29]:
# Changing NOffSex to int64
df_hr['NOffSex'] = df_hr['NOffSex'].astype('int64')

In [30]:
# Changing NVicRace to int64
df_hr['NVicRace'] = df_hr['NVicRace'].astype('int64')

In [31]:
# Changing NOffRace to int64
df_hr['NOffRace'] = df_hr['NOffRace'].astype('int64')

In [32]:
# Changing NWeapon to int64
df_hr['NWeapon'] = df_hr['NWeapon'].astype('int64')

In [33]:
# Checking changes
df_hr.head()

Unnamed: 0,City,State,Agency,Agentype,Solved,Year,Month,ActionType,Homicide,Situation,VicAge,VicSex,VicRace,VicEthnic,OffAge,OffSex,OffRace,OffEthnic,Weapon,Relationship,Circumstance,VicCount,OffCount,Date,NVicSex,NOffSex,NVicRace,NOffRace,Closeness_Rating,NWeapon,NSolved
0,"Autauga, AL",Alabama,Autauga County,Sheriff,No,1976,September,Normal update,Murder and non-negligent manslaughter,Single victim/unknown offender(s),30.0,Male,Black,Unknown or not reported,31.138045,Unknown,Unknown,Unknown or not reported,Other or type unknown,Relationship not determined,Other,0,0,1976-9,1,0,2,0,,5,0
1,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1977,January,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,65.0,Female,Black,Unknown or not reported,62.0,Male,Black,Unknown or not reported,Other or type unknown,Acquaintance,Brawl due to influence of alcohol,0,0,1977-1,2,1,2,2,2.0,5,1
2,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1977,March,Normal update,Murder and non-negligent manslaughter,Single victim/multiple offenders,48.0,Male,White,Unknown or not reported,52.0,Male,White,Unknown or not reported,"Handgun - pistol, revolver, etc",Acquaintance,Other arguments,0,1,1977-3,1,1,1,1,2.0,1,1
3,"Autauga, AL",Alabama,Prattville,Municipal police,Yes,1977,March,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,27.0,Male,Black,Unknown or not reported,22.0,Female,Black,Unknown or not reported,Shotgun,Husband,Other arguments,0,0,1977-3,1,2,2,2,4.0,6,1
4,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1977,August,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,17.0,Female,Black,Unknown or not reported,21.0,Male,Black,Unknown or not reported,Knife or cutting instrument,Acquaintance,Circumstances undetermined,0,0,1977-8,2,1,2,2,2.0,2,1


In [34]:
# Dropping Relationship Undetermined as it does not apply to mapping
df_hr.drop(df_hr.loc[df_hr['Relationship'] == 'Relationship not determined'].index, inplace = True)

In [35]:
# Checking the shape of df after drop
df_hr.shape

(504551, 31)

In [36]:
# Dropping District of Columbia as it's not a state
df_hr.drop(df_hr.loc[df_hr['State'] == 'District of Columbia'].index, inplace = True)

In [37]:
# Checking what PAPSP8 is
df_hr.loc[df_hr['State'] == 'PAPSP8']

Unnamed: 0,City,State,Agency,Agentype,Solved,Year,Month,ActionType,Homicide,Situation,VicAge,VicSex,VicRace,VicEthnic,OffAge,OffSex,OffRace,OffEthnic,Weapon,Relationship,Circumstance,VicCount,OffCount,Date,NVicSex,NOffSex,NVicRace,NOffRace,Closeness_Rating,NWeapon,NSolved
661383,"Somerset, PA",PAPSP8,"State Police, Somerset",Primary state LE,Yes,2020,March,Normal update,Manslaughter by negligence,Single victim/single offender,12.0,Female,White,Not of Hispanic origin,52.0,Female,White,Not of Hispanic origin,"Narcotics or drugs, sleeping pills",Other family,All other manslaughter by negligence,0,0,2020-3,2,2,1,1,4,13,1


In [38]:
# Changing PAPSP8 to Pennsylvania
df_hr.at[661383,'State'] = 'Pennsylvania'

In [39]:
# Changing Rhodes Island to Rhode Island
df_hr.replace('Rhodes Island', 'Rhode Island', inplace = True)

In [40]:
# Checking for Rhode Island
df_hr.loc[df_hr['State'] == 'Rhode Island']

Unnamed: 0,City,State,Agency,Agentype,Solved,Year,Month,ActionType,Homicide,Situation,VicAge,VicSex,VicRace,VicEthnic,OffAge,OffSex,OffRace,OffEthnic,Weapon,Relationship,Circumstance,VicCount,OffCount,Date,NVicSex,NOffSex,NVicRace,NOffRace,Closeness_Rating,NWeapon,NSolved
662901,"Bristol, RI",Rhode Island,Warren,Municipal police,Yes,1977,April,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,41.0,Male,White,Unknown or not reported,39.0,Male,White,Unknown or not reported,"Handgun - pistol, revolver, etc",Acquaintance,Circumstances undetermined,0,0,1977-4,1,1,1,1,2,1,1
662903,"Bristol, RI",Rhode Island,Barrington,Municipal police,Yes,1985,May,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,38.0,Female,White,Not of Hispanic origin,33.0,Female,White,Not of Hispanic origin,"Handgun - pistol, revolver, etc",Stranger,Circumstances undetermined,0,0,1985-5,2,2,1,1,1,1,1
662904,"Bristol, RI",Rhode Island,Barrington,Municipal police,Yes,1991,November,Normal update,Murder and non-negligent manslaughter,Multiple victims/single offender,53.0,Male,White,Unknown or not reported,42.0,Male,White,Unknown or not reported,Knife or cutting instrument,Acquaintance,Other - not specified,2,0,1991-11,1,1,1,1,2,2,1
662905,"Bristol, RI",Rhode Island,Barrington,Municipal police,Yes,1991,November,Normal update,Murder and non-negligent manslaughter,Multiple victims/single offender,46.0,Female,White,Unknown or not reported,42.0,Male,White,Unknown or not reported,Knife or cutting instrument,Acquaintance,Other - not specified,2,0,1991-11,2,1,1,1,2,2,1
662906,"Bristol, RI",Rhode Island,Barrington,Municipal police,Yes,1991,November,Normal update,Murder and non-negligent manslaughter,Multiple victims/single offender,8.0,Female,White,Unknown or not reported,42.0,Male,White,Unknown or not reported,Knife or cutting instrument,Acquaintance,Other - not specified,2,0,1991-11,2,1,1,1,2,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
664388,"Washington, RI",Rhode Island,Narragansett,Municipal police,Yes,2013,May,Normal update,Murder and non-negligent manslaughter,Single victim/multiple offenders,65.0,Female,White,Unknown or not reported,45.0,Male,White,Unknown or not reported,Other or type unknown,Mother,Circumstances undetermined,0,1,2013-5,2,1,1,1,4,5,1
664390,"Washington, RI",Rhode Island,North Kingstown,Municipal police,Yes,2017,June,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,69.0,Female,White,Not of Hispanic origin,38.0,Male,White,Unknown or not reported,Other or type unknown,Stranger,Other,0,0,2017-6,2,1,1,1,1,5,1
664391,"Washington, RI",Rhode Island,Westerly,Municipal police,Yes,2018,January,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,50.0,Male,White,Not of Hispanic origin,50.0,Male,White,Unknown or not reported,Knife or cutting instrument,Acquaintance,Other arguments,0,0,2018-1,1,1,1,1,2,2,1
664392,"Washington, RI",Rhode Island,Westerly,Municipal police,Yes,2019,December,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,47.0,Female,White,Not of Hispanic origin,66.0,Male,White,Unknown or not reported,"Handgun - pistol, revolver, etc",Other - known to victim,Circumstances undetermined,0,0,2019-12,2,1,1,1,2,1,1


In [41]:
# Checking value counts in Closeness_Rating
df_hr['Closeness_Rating'].value_counts(dropna = False)

2    205235
1    119778
4    107261
3     69809
Name: Closeness_Rating, dtype: int64

In [42]:
# Checking data
df_hr.head()

Unnamed: 0,City,State,Agency,Agentype,Solved,Year,Month,ActionType,Homicide,Situation,VicAge,VicSex,VicRace,VicEthnic,OffAge,OffSex,OffRace,OffEthnic,Weapon,Relationship,Circumstance,VicCount,OffCount,Date,NVicSex,NOffSex,NVicRace,NOffRace,Closeness_Rating,NWeapon,NSolved
1,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1977,January,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,65.0,Female,Black,Unknown or not reported,62.0,Male,Black,Unknown or not reported,Other or type unknown,Acquaintance,Brawl due to influence of alcohol,0,0,1977-1,2,1,2,2,2,5,1
2,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1977,March,Normal update,Murder and non-negligent manslaughter,Single victim/multiple offenders,48.0,Male,White,Unknown or not reported,52.0,Male,White,Unknown or not reported,"Handgun - pistol, revolver, etc",Acquaintance,Other arguments,0,1,1977-3,1,1,1,1,2,1,1
3,"Autauga, AL",Alabama,Prattville,Municipal police,Yes,1977,March,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,27.0,Male,Black,Unknown or not reported,22.0,Female,Black,Unknown or not reported,Shotgun,Husband,Other arguments,0,0,1977-3,1,2,2,2,4,6,1
4,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1977,August,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,17.0,Female,Black,Unknown or not reported,21.0,Male,Black,Unknown or not reported,Knife or cutting instrument,Acquaintance,Circumstances undetermined,0,0,1977-8,2,1,2,2,2,2,1
5,"Autauga, AL",Alabama,Autauga County,Sheriff,Yes,1977,October,Normal update,Murder and non-negligent manslaughter,Single victim/single offender,62.0,Male,Asian,Unknown or not reported,80.0,Male,Black,Unknown or not reported,Shotgun,Stranger,Robbery,0,0,1977-10,1,1,3,2,1,6,1


In [43]:
# Exporting df_hr to CSV
df_hr.to_csv(os.path.join(path, '02 Data', 'Prepared data', 'Homicide_Tableau_df.csv'))

In [44]:
df_hr.shape

(502083, 31)

In [47]:
# Creating a sub-df for state names
StateV = df_hr.State.unique()
StateV

array(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
       'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia',
       'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas',
       'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts',
       'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana',
       'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico',
       'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma',
       'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina',
       'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont',
       'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming'],
      dtype=object)