In [None]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
communities_and_crime = fetch_ucirepo(id=183) 
  
# data (as pandas dataframes) 
data = communities_and_crime.data.features 
y = communities_and_crime.data.targets 
  
# metadata 
# print(communities_and_crime.metadata) 
  
# variable information 
print(communities_and_crime.variables) 

In [None]:
# Remove features containing missing values, except 'state'
data = data.drop(data.loc[:, data.columns != 'state'].columns[data.loc[:, data.columns != 'state'].eq('?').any()], axis=1)
data


In [None]:
X = data.drop(['state', 'communityname','fold'], axis=1)
X

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(-1, 1))
X_standardised = scaler.fit_transform(X)

X_standardised

In [None]:
y

In [None]:
for state, group in data.groupby('state'):
    print(f"State {state}:")
    i = 0
    for community in group['communityname']:
        print(community)
        i += 1
        if i == 5:
            break
    


In [None]:
state_dict = {
    1: "Alabama",
    2: "Alaska",
    4: "Arizona",
    5: "Arkansas",
    6: "California",
    8: "Colorado",
    9: "Connecticut",
    10: "Delaware",
    11: "DC",
    12: "Florida",
    13: "Georgia",
    16: "Idaho",
    18: "Indiana",
    19: "Iowa",
    20: "Kansas",
    21: "Kentucky",
    22: "Louisiana",
    23: "Maine",
    24: "Maryland",
    25: "Massachusetts",
    27: "Minnesota",
    28: "Mississippi",
    29: "Missouri",
    32: "Nevada",
    33: "New Hampshire",
    34: "New Jersey",
    35: "New Mexico",
    36: "New York",
    37: "North Carolina",
    38: "North Dakota",
    39: "Ohio",
    40: "Oklahoma",
    41: "Oregon",
    42: "Pennsylvania",
    44: "Rhode Island",
    45: "South Carolina",
    46: "South Dakota",
    47: "Tennessee",
    48: "Texas",
    49: "Utah",
    50: "Vermont",
    51: "Virginia",
    53: "Washington",
    54: "West Virginia",
    55: "Wisconsin",
    56: "Wyoming"
}

In [None]:
import numpy as np

crime_dict = {}

for state, group in data.groupby('state'):
    i = np.array(data.loc[data['state'] == state].index)
    mean_crime = y.iloc[i].mean().item()
    crime_dict[state_dict[state]] = mean_crime

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap as Basemap
from matplotlib.colors import rgb2hex
from matplotlib.patches import Polygon
from matplotlib.colors import Normalize

plt.figure(figsize=(12,6))

# Lambert Conformal map of lower 48 states.
m = Basemap(llcrnrlon=-119,llcrnrlat=22,urcrnrlon=-64,urcrnrlat=49,
        projection='lcc',lat_1=33,lat_2=45,lon_0=-95)
# draw state boundaries.
# data from U.S Census Bureau
# http://www.census.gov/geo/www/cob/st2000.html
shp_info = m.readshapefile('st99_d00','states',drawbounds=True)
# population density by state from
# http://en.wikipedia.org/wiki/List_of_U.S._states_by_population_density

# choose a color for each state based on population density.
colors={}
statenames=[]
cmap = plt.cm.hot_r # use 'hot' colormap
vmin = 0; vmax = 1 # set range.
for shapedict in m.states_info:
    statename = shapedict['NAME']
    # skip DC and Puerto Rico.
    if statename in crime_dict.keys():
        crime = crime_dict[statename]
        # calling colormap with value between 0 and 1 returns
        # rgba value.  Invert color range (hot colors are high
        # population), take sqrt root to spread out colors more.
        colors[statename] = cmap((crime - vmin)/(vmax-vmin))[:3]
    statenames.append(statename)
# cycle through state names, color each one.
ax = plt.gca() # get current axes instance
for nshape,seg in enumerate(m.states):
    # skip DC and Puerto Rico.
    if statenames[nshape] in crime_dict.keys():
    # Offset Alaska and Hawaii to the lower-left corner. 
        if statenames[nshape] == 'Alaska':
        # Alaska is too big. Scale it down to 35% first, then transate it. 
            seg = list(map(lambda coord: (0.35*coord[0] + 1100000, 0.35*coord[1]-1440000), seg))

        color = rgb2hex(colors[statenames[nshape]]) 
        poly = Polygon(seg,facecolor=color,edgecolor=color)
        ax.add_patch(poly)

# Create a ScalarMappable object for colormap normalization
norm = Normalize(vmin=vmin, vmax=vmax)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])

# Add colorbar
cbar = plt.colorbar(sm, ax=ax, orientation='vertical')
cbar.set_label('Violent crimes')

plt.title('US States by Violent crimes per population')
plt.show()



In [None]:
crime_dict