In [1]:
# Import Dependencies
import pandas as pd
import os
import csv

crime_file = os.path.join("crime_incident_data2017.csv")


In [2]:
# Reference the file where the CSV is located

# Import the data into a Pandas DataFrame

portland_crime_df = pd.read_csv(crime_file,encoding="ISO-8859-1")


In [3]:
# look for missing values

portland_crime_df.head()

Unnamed: 0,Address,Case Number,Crime Against,Neighborhood,Number of Records,Occur Date,Occur Month Year,Occur Time,Offense Category,Offense Count,Offense Type,Open Data Lat,Open Data Lon,Open Data X,Open Data Y,Report Date,Report Month Year
0,,17-X4762181,Person,,1,1/1/96,1/1/96,800,Sex Offenses,1,Rape,,,,,1/26/17,1/1/17
1,,17-X4757824,Property,Centennial,1,1/20/00,1/1/00,1615,Fraud Offenses,1,Identity Theft,,,,,1/20/17,1/1/17
2,200 BLOCK OF SE 78TH AVE,17-900367,Property,Montavilla,1,12/1/03,12/1/03,800,Fraud Offenses,1,False Pretenses/Swindle/Confidence Game,45.5207,-122.583,7668150.0,682825.0,1/9/17,1/1/17
3,,17-X4748982,Property,Southwest Hills,1,1/1/10,1/1/10,0,Fraud Offenses,1,Identity Theft,,,,,1/5/17,1/1/17
4,,17-X4748982,Property,Southwest Hills,1,1/1/10,1/1/10,0,Larceny Offenses,1,All Other Larceny,,,,,1/5/17,1/1/17


In [4]:
# drop null rows
cleaned_df = portland_crime_df.dropna()

In [5]:
# verify counts

cleaned_df.count()

Address              36146
Case Number          36146
Crime Against        36146
Neighborhood         36146
Number of Records    36146
Occur Date           36146
Occur Month Year     36146
Occur Time           36146
Offense Category     36146
Offense Count        36146
Offense Type         36146
Open Data Lat        36146
Open Data Lon        36146
Open Data X          36146
Open Data Y          36146
Report Date          36146
Report Month Year    36146
dtype: int64

In [6]:
# Check to see if there are any values with mispelled or similar values in "Offense Type"
cleaned_df["Offense Type"].value_counts()

Theft From Motor Vehicle                       6947
Motor Vehicle Theft                            4689
All Other Larceny                              4558
Vandalism                                      3863
Burglary                                       2824
Shoplifting                                    2259
Identity Theft                                 1794
Simple Assault                                 1216
Drug/Narcotic Violations                       1095
Theft of Motor Vehicle Parts or Accessories    1073
Intimidation                                    900
Theft From Building                             895
False Pretenses/Swindle/Confidence Game         870
Aggravated Assault                              839
Robbery                                         608
Counterfeiting/Forgery                          448
Weapons Law Violations                          266
Credit Card/ATM Fraud                           226
Arson                                           200
Prostitution

In [7]:
# Combine similar offenses
cleaned_df['Offense Type'] = cleaned_df['Offense Type'].replace(
    {'Theft From Motor Vehicle':'Motor Vehicle Theft',
    'Theft of Motor Vehicle Parts or Accessories':'Motor Vehicle Theft'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [8]:
# Check to see if you comnbined similar offenses correctly in "Offense Type".
cleaned_df["Offense Type"].value_counts()

Motor Vehicle Theft                           12709
All Other Larceny                              4558
Vandalism                                      3863
Burglary                                       2824
Shoplifting                                    2259
Identity Theft                                 1794
Simple Assault                                 1216
Drug/Narcotic Violations                       1095
Intimidation                                    900
Theft From Building                             895
False Pretenses/Swindle/Confidence Game         870
Aggravated Assault                              839
Robbery                                         608
Counterfeiting/Forgery                          448
Weapons Law Violations                          266
Credit Card/ATM Fraud                           226
Arson                                           200
Prostitution                                    145
Pocket-Picking                                   94
Purse-Snatch

In [9]:
cleaned_df.head()

Unnamed: 0,Address,Case Number,Crime Against,Neighborhood,Number of Records,Occur Date,Occur Month Year,Occur Time,Offense Category,Offense Count,Offense Type,Open Data Lat,Open Data Lon,Open Data X,Open Data Y,Report Date,Report Month Year
2,200 BLOCK OF SE 78TH AVE,17-900367,Property,Montavilla,1,12/1/03,12/1/03,800,Fraud Offenses,1,False Pretenses/Swindle/Confidence Game,45.5207,-122.583,7668150.0,682825.0,1/9/17,1/1/17
5,5400 BLOCK OF NE MALLORY AVE,17-900129,Property,King,1,11/28/10,11/1/10,1612,Fraud Offenses,1,Identity Theft,45.5625,-122.664,7647987.0,698581.0,1/3/17,1/1/17
6,5000 BLOCK OF NE 19TH AVE,17-901079,Property,Vernon,1,11/8/13,11/1/13,1200,Fraud Offenses,1,False Pretenses/Swindle/Confidence Game,45.5594,-122.646,7652567.0,697337.0,1/26/17,1/1/17
7,5000 BLOCK OF NE 19TH AVE,17-901079,Property,Vernon,1,11/8/13,11/1/13,1200,Fraud Offenses,1,Identity Theft,45.5594,-122.646,7652567.0,697337.0,1/26/17,1/1/17
8,12000 BLOCK OF SE PINE ST,17-900253,Property,Hazelwood,1,1/6/14,1/1/14,805,Fraud Offenses,1,Credit Card/ATM Fraud,45.5204,-122.539,7679522.0,682404.0,1/6/17,1/1/17


In [10]:
# Create a new DataFrame that looks into a specific neighborhood

nb_df = cleaned_df.set_index("Neighborhood")

nb_df.head()

Unnamed: 0_level_0,Address,Case Number,Crime Against,Number of Records,Occur Date,Occur Month Year,Occur Time,Offense Category,Offense Count,Offense Type,Open Data Lat,Open Data Lon,Open Data X,Open Data Y,Report Date,Report Month Year
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Montavilla,200 BLOCK OF SE 78TH AVE,17-900367,Property,1,12/1/03,12/1/03,800,Fraud Offenses,1,False Pretenses/Swindle/Confidence Game,45.5207,-122.583,7668150.0,682825.0,1/9/17,1/1/17
King,5400 BLOCK OF NE MALLORY AVE,17-900129,Property,1,11/28/10,11/1/10,1612,Fraud Offenses,1,Identity Theft,45.5625,-122.664,7647987.0,698581.0,1/3/17,1/1/17
Vernon,5000 BLOCK OF NE 19TH AVE,17-901079,Property,1,11/8/13,11/1/13,1200,Fraud Offenses,1,False Pretenses/Swindle/Confidence Game,45.5594,-122.646,7652567.0,697337.0,1/26/17,1/1/17
Vernon,5000 BLOCK OF NE 19TH AVE,17-901079,Property,1,11/8/13,11/1/13,1200,Fraud Offenses,1,Identity Theft,45.5594,-122.646,7652567.0,697337.0,1/26/17,1/1/17
Hazelwood,12000 BLOCK OF SE PINE ST,17-900253,Property,1,1/6/14,1/1/14,805,Fraud Offenses,1,Credit Card/ATM Fraud,45.5204,-122.539,7679522.0,682404.0,1/6/17,1/1/17


In [11]:
# King_df = nb_df.loc["King",:]

King_df = nb_df.loc["King",:]

In [16]:
King_df.sort_values(by="Address")

King_df.head()

Unnamed: 0_level_0,Address,Case Number,Crime Against,Number of Records,Occur Date,Occur Month Year,Occur Time,Offense Category,Offense Count,Offense Type,Open Data Lat,Open Data Lon,Open Data X,Open Data Y,Report Date,Report Month Year
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
King,5400 BLOCK OF NE MALLORY AVE,17-900129,Property,1,11/28/10,11/1/10,1612,Fraud Offenses,1,Identity Theft,45.5625,-122.664,7647987.0,698581.0,1/3/17,1/1/17
King,5500 BLOCK OF NE MARTIN LUTHER KING JR BLVD,17-9250,Property,1,12/10/16,12/1/16,0,Larceny Offenses,1,All Other Larceny,45.5631,-122.661,7648603.0,698786.0,1/10/17,1/1/17
King,900 BLOCK OF NE ALBERTA ST,17-900046,Property,1,1/1/17,1/1/17,925,Larceny Offenses,1,All Other Larceny,45.5592,-122.656,7649920.0,697323.0,1/2/17,1/1/17
King,NE 6TH AVE / NE BEECH ST,17-1842,Society,1,1/2/17,1/1/17,2144,Weapon Law Violations,1,Weapons Law Violations,45.5495,-122.66,7648922.0,693827.0,1/2/17,1/1/17
King,1400 BLOCK OF NE ALBERTA ST,17-900118,Property,1,1/1/17,1/1/17,1355,Vandalism,1,Vandalism,45.5591,-122.651,7651172.0,697285.0,1/3/17,1/1/17
