In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("californiaDS.csv")
df.columns = ["City", "Population", "Violent Crime", "Murder and Non-negligent Manslaughter", "Rape", "Robbery", "Aggravated Assault", "Property Crime", "Burglary", "Larceny-Theft", "Motor Vehicle Theft", "Arson"]
df.drop(index=df.index[-1], axis=0, inplace=True)
df.replace(",", "", regex = True, inplace = True)
df

Unnamed: 0,City,Population,Violent Crime,Murder and Non-negligent Manslaughter,Rape,Robbery,Aggravated Assault,Property Crime,Burglary,Larceny-Theft,Motor Vehicle Theft,Arson
0,Adelanto,34491,276,1.0,20,42,213,459,136,209,114,14
1,Agoura Hills,20490,21,0.0,6,4,11,306,66,223,17,0
2,Alameda,78907,162,0.0,7,94,61,2579,218,1958,403,29
3,Albany,20083,40,0.0,8,21,11,685,105,534,46,1
4,Alhambra,84837,161,2.0,11,89,59,1749,259,1303,187,8
...,...,...,...,...,...,...,...,...,...,...,...,...
452,Yountville,2988,5,0.0,0,0,5,42,5,36,1,1
453,Yreka,7527,28,0.0,0,1,27,235,34,172,29,0
454,Yuba City,67164,241,3.0,28,52,158,1823,230,1334,259,7
455,Yucaipa,53964,200,3.0,14,31,152,641,172,365,104,0


In [2]:
df.dtypes

City                                      object
Population                                object
Violent Crime                             object
Murder and Non-negligent Manslaughter    float64
Rape                                      object
Robbery                                   object
Aggravated Assault                        object
Property Crime                            object
Burglary                                  object
Larceny-Theft                             object
Motor Vehicle Theft                       object
Arson                                     object
dtype: object

In [3]:
typeConversion = {"City": str, "Population": int, "Violent Crime": int, "Murder and Non-negligent Manslaughter": int, "Rape": int, "Robbery": int, "Aggravated Assault": int, "Property Crime": int, "Burglary": int, "Larceny-Theft": int, "Motor Vehicle Theft": int, "Arson": int}
df = df.astype(typeConversion)
df.dtypes

City                                     object
Population                                int32
Violent Crime                             int32
Murder and Non-negligent Manslaughter     int32
Rape                                      int32
Robbery                                   int32
Aggravated Assault                        int32
Property Crime                            int32
Burglary                                  int32
Larceny-Theft                             int32
Motor Vehicle Theft                       int32
Arson                                     int32
dtype: object

In [4]:
df.head()

Unnamed: 0,City,Population,Violent Crime,Murder and Non-negligent Manslaughter,Rape,Robbery,Aggravated Assault,Property Crime,Burglary,Larceny-Theft,Motor Vehicle Theft,Arson
0,Adelanto,34491,276,1,20,42,213,459,136,209,114,14
1,Agoura Hills,20490,21,0,6,4,11,306,66,223,17,0
2,Alameda,78907,162,0,7,94,61,2579,218,1958,403,29
3,Albany,20083,40,0,8,21,11,685,105,534,46,1
4,Alhambra,84837,161,2,11,89,59,1749,259,1303,187,8


In [5]:
crimeRate = df.loc[:, (df.columns != "City") & (df.columns != "Population")].sum(axis="columns")
print(crimeRate)

0      1484
1       654
2      5511
3      1451
4      3828
       ... 
452      95
453     526
454    4135
455    1682
456     907
Length: 457, dtype: int64


In [6]:
crimeRate = crimeRate / df["Population"]
print(crimeRate)

0      0.043026
1      0.031918
2      0.069842
3      0.072250
4      0.045122
         ...   
452    0.031794
453    0.069882
454    0.061566
455    0.031169
456    0.041495
Length: 457, dtype: float64


In [7]:
crimeRate = crimeRate * 100000
print(crimeRate)

0      4302.571685
1      3191.800878
2      6984.171240
3      7225.016183
4      4512.182185
          ...     
452    3179.384203
453    6988.175900
454    6156.571973
455    3116.892743
456    4149.510477
Length: 457, dtype: float64


In [8]:
df2 = pd.read_csv("Report.csv")
df2

Unnamed: 0,Name,Address,City,Zipcode,Statea
0,Brea Police Dept,1 Civic Center Cir,Brea,92821-5732,CA
1,Scotts Valley Police Dept,1 Civic Center Dr,Scotts Valley,95066-4156,CA
2,Irvine Police Dept-Traffic,1 Civic Center Plz,Irvine,92606-5207,CA
3,Ione Police Dept,1 E Main St,Ione,95640-0000,CA
4,Los Angeles County Sheriff,1 E Regent St # 122,Inglewood,90301-6053,CA
...,...,...,...,...,...
975,Madera County Sheriff's Wrk,PO Box 804,Oakhurst,93644-0804,CA
976,San Anselmo Police Dept,Ri 5 San Anselmo Ave,San Anselmo,94960-0000,CA
977,Imperial County Sheriff's Dept,S Marina Dr,Salton City,92275-0000,CA
978,Barstow Police Substation,Tanger Outlet Mall,Barstow,92311-0000,CA


In [9]:
stationCounts = df2["City"].value_counts().to_dict()
print(stationCounts["San Francisco"])

34


In [10]:
sortedCounts = []
weight = 1
for city in df["City"]:
    if city in stationCounts.keys():
        sortedCounts.append(stationCounts[city] + weight)
    else:
        sortedCounts.append(weight)
print(len(sortedCounts))

457


In [11]:
df["Police Station Weight"] = sortedCounts
df[df["City"] == "San Francisco"]

Unnamed: 0,City,Population,Violent Crime,Murder and Non-negligent Manslaughter,Rape,Robbery,Aggravated Assault,Property Crime,Burglary,Larceny-Theft,Motor Vehicle Theft,Arson,Police Station Weight
354,San Francisco,886007,5933,40,324,3055,2514,48780,4644,39887,4249,275,35


In [12]:
safetyScore = round(crimeRate / df["Police Station Weight"]).astype(int)
print(safetyScore)

0      2151
1      1596
2      1746
3      3613
4      2256
       ... 
452    1590
453    2329
454    1539
455    1558
456    4150
Length: 457, dtype: int32


In [19]:
sortedScore = sorted(safetyScore)
print(sortedScore)

[136, 186, 209, 225, 245, 340, 354, 432, 521, 541, 590, 593, 658, 665, 674, 684, 690, 702, 704, 705, 710, 711, 712, 736, 740, 769, 777, 809, 809, 829, 835, 839, 843, 852, 861, 861, 868, 870, 876, 881, 885, 907, 911, 911, 918, 919, 919, 921, 945, 947, 964, 978, 978, 980, 980, 999, 1013, 1016, 1019, 1024, 1030, 1035, 1040, 1056, 1061, 1062, 1076, 1081, 1082, 1085, 1087, 1091, 1092, 1093, 1093, 1093, 1093, 1096, 1100, 1100, 1100, 1101, 1109, 1112, 1126, 1159, 1159, 1160, 1166, 1169, 1173, 1183, 1185, 1201, 1205, 1208, 1228, 1229, 1229, 1238, 1238, 1244, 1249, 1263, 1267, 1270, 1273, 1277, 1287, 1295, 1296, 1303, 1305, 1305, 1307, 1308, 1308, 1322, 1325, 1328, 1334, 1335, 1343, 1343, 1347, 1348, 1350, 1352, 1354, 1354, 1356, 1372, 1373, 1373, 1384, 1385, 1393, 1404, 1406, 1407, 1408, 1420, 1423, 1442, 1463, 1471, 1472, 1474, 1484, 1487, 1493, 1500, 1501, 1502, 1510, 1514, 1517, 1526, 1527, 1536, 1539, 1546, 1549, 1553, 1558, 1566, 1567, 1568, 1574, 1575, 1575, 1581, 1582, 1585, 1588, 1589,

In [20]:
import statistics as st

med = st.median(sortedScore)
print(med)

1830


In [14]:
df["Safety Score"] = safetyScore
df.head()

Unnamed: 0,City,Population,Violent Crime,Murder and Non-negligent Manslaughter,Rape,Robbery,Aggravated Assault,Property Crime,Burglary,Larceny-Theft,Motor Vehicle Theft,Arson,Police Station Weight,Safety Score
0,Adelanto,34491,276,1,20,42,213,459,136,209,114,14,2,2151
1,Agoura Hills,20490,21,0,6,4,11,306,66,223,17,0,2,1596
2,Alameda,78907,162,0,7,94,61,2579,218,1958,403,29,4,1746
3,Albany,20083,40,0,8,21,11,685,105,534,46,1,2,3613
4,Alhambra,84837,161,2,11,89,59,1749,259,1303,187,8,2,2256


In [22]:
safetyRating = []
for score in df["Safety Score"]:
    if score in range(0, 1000):
        safetyRating.append("Safe!")
    elif score in range(1000, med + 1):
        safetyRating.append("Low Risk")
    elif score in range(med + 1, 5000):
        safetyRating.append("Risky")
    elif score in range(5000, 10000):
        safetyRating.append("High Risk!")
    elif score in range(10000, 30000):
        safetyRating.append("Very High Risk!!")
    else:
        safetyRating.append("EXTREME RISK! DO NOT BOOK!")

df["Risk Assessment"] = safetyRating
df.head()

Unnamed: 0,City,Population,Violent Crime,Murder and Non-negligent Manslaughter,Rape,Robbery,Aggravated Assault,Property Crime,Burglary,Larceny-Theft,Motor Vehicle Theft,Arson,Police Station Weight,Safety Score,Risk Assessment
0,Adelanto,34491,276,1,20,42,213,459,136,209,114,14,2,2151,Risky
1,Agoura Hills,20490,21,0,6,4,11,306,66,223,17,0,2,1596,Low Risk
2,Alameda,78907,162,0,7,94,61,2579,218,1958,403,29,4,1746,Low Risk
3,Albany,20083,40,0,8,21,11,685,105,534,46,1,2,3613,Risky
4,Alhambra,84837,161,2,11,89,59,1749,259,1303,187,8,2,2256,Risky


In [36]:
import json

jsonDict = dict(zip(df["City"], df["Risk Assessment"]))
print(jsonDict)

{'Adelanto': 'Risky', 'Agoura Hills': 'Low Risk', 'Alameda': 'Low Risk', 'Albany': 'Risky', 'Alhambra': 'Risky', 'Aliso Viejo': 'Safe!', 'Alturas': 'Safe!', 'American Canyon': 'Risky', 'Anaheim': 'Low Risk', 'Anderson': 'Risky', 'Angels Camp': 'Risky', 'Antioch': 'Risky', 'Apple Valley': 'Low Risk', 'Arcadia': 'Risky', 'Arcata': 'Risky', 'Arroyo Grande': 'Low Risk', 'Artesia': 'Risky', 'Arvin': 'High Risk!', 'Atascadero': 'Low Risk', 'Atherton': 'Low Risk', 'Atwater': 'Risky', 'Auburn': 'Low Risk', 'Avalon': 'Low Risk', 'Avenal': 'Safe!', 'Azusa': 'Risky', 'Bakersfield': 'Low Risk', 'Baldwin Park': 'Low Risk', 'Banning': 'Low Risk', 'Barstow': 'Low Risk', 'Bear Valley': 'Safe!', 'Beaumont': 'Risky', 'Bell': 'Low Risk', 'Bellflower': 'Risky', 'Bell Gardens': 'Risky', 'Belmont': 'Low Risk', 'Belvedere': 'Low Risk', 'Benicia': 'Low Risk', 'Berkeley': 'Risky', 'Beverly Hills': 'Risky', 'Big Bear': 'High Risk!', 'Biggs': 'Low Risk', 'Bishop': 'Risky', 'Blythe': 'Risky', 'Bradbury': 'Risky',

In [37]:
with open("safetyRatings.json", "w") as outfile:
    json.dump(jsonDict, outfile)