In [2]:
import plotly.express as px
import pandas as pd

import pandas as pd
import plotly.graph_objects as go





In [3]:
data = pd.read_excel("SACrimeData.xlsx")

In [4]:
data.head()

Unnamed: 0,Province,Year,Crime,Incidents,Latitude,Longitude
0,Gauteng,2021,Rape,1200,-26.2041,28.0473
1,Gauteng,2021,Murder,950,-26.2041,28.0473
2,Gauteng,2021,Hijacking,500,-26.2041,28.0473
3,Western Cape,2021,Rape,850,-33.9249,18.4241
4,Western Cape,2021,Murder,800,-33.9249,18.4241


In [5]:
# In Jupyter cell 2:

def get_aggregated_data_for_crime(crime_type):
    return (
        data[data['Crime'] == crime_type]
        .groupby("Province")
        .agg({"Incidents": "sum", "Latitude": "first", "Longitude": "first"})
        .reset_index()
    )


In [10]:
# Create one trace for each crime type
crime_types = data['Crime'].unique().tolist()

In [17]:


# 1. Create the initial trace with the first crime type
initial_crime = crime_types[0]
aggregated_data = get_aggregated_data_for_crime(initial_crime)

fig = px.scatter_mapbox(
    aggregated_data,
    lat='Latitude', 
    lon='Longitude', 
    color='Incidents', 
    size='Incidents',
    hover_name='Province',
    hover_data=['Incidents'],
    color_continuous_scale="Viridis",
    size_max=50,
    zoom=6,
    center={"lat": -28.6139, "lon": 24.8622},
    mapbox_style="open-street-map",
    title=f'Crime Data for {initial_crime}'
)

# 2. Create the dropdown functionality
buttons = []

for crime in crime_types:
    aggregated_data = get_aggregated_data_for_crime(crime)

    button = {
        'label': crime,
        'method': 'update',
        'args': [
            {
                'x': [aggregated_data['Longitude'].tolist()],
                'y': [aggregated_data['Latitude'].tolist()],
                'marker.size': [aggregated_data['Incidents'].tolist()],
                'marker.color': [aggregated_data['Incidents'].tolist()]
            },
            {
                'title': f'Crime Data for {crime}'
            }
        ]
    }
    buttons.append(button)

fig.update_layout(
    updatemenus=[
        {
            'buttons': buttons,
            'direction': 'down',
            'showactive': True,
            'x': 1.1,  # position the dropdown slightly to the right
            'y': 1.2  # position the dropdown slightly above
        }
    ]
)

fig.show()


MODEL CREATION

In [41]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# Encoding 'Province' column
le = LabelEncoder()
data['Province_encoded'] = le.fit_transform(data['Province'])

# Splitting the dataset
features = ['Incidents', 'Latitude', 'Longitude', 'Province_encoded']
X = data[features]
y = data['Crime']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [42]:
clf = RandomForestClassifier()
clf.fit(X_train, y_train)


In [46]:
# Predict probabilities without dropping any columns
probs = clf.predict_proba(X_test)

# Add these probabilities back to X_test for visualization
for i, crime in enumerate(clf.classes_):
    X_test[f'{crime}_prob'] = probs[:, i]


In [47]:
import plotly.express as px

initial_crime = clf.classes_[0]

fig = px.scatter_mapbox(
    X_test,
    lat='Latitude', 
    lon='Longitude', 
    color=f'{initial_crime}_prob', 
    size=f'{initial_crime}_prob',
    hover_name=X_test.index.map(data['Province'].to_dict()),  # Using index to map back to 'Province'
    hover_data=[f'{initial_crime}_prob'],
    color_continuous_scale="Viridis",
    size_max=50,
    zoom=6,
    center={"lat": -28.6139, "lon": 24.8622},
    mapbox_style="open-street-map",
    title=f'Predicted Probabilities for {initial_crime}'
)


In [48]:
buttons = []

for crime in clf.classes_:
    button = {
        'label': crime,
        'method': 'update',
        'args': [
            {
                'x': [X_test['Longitude'].tolist()],
                'y': [X_test['Latitude'].tolist()],
                'marker.size': [X_test[f'{crime}_prob'].tolist()],
                'marker.color': [X_test[f'{crime}_prob'].tolist()]
            },
            {
                'title': f'Predicted Probabilities for {crime}'
            }
        ]
    }
    buttons.append(button)

fig.update_layout(
    updatemenus=[
        {
            'buttons': buttons,
            'direction': 'down',
            'showactive': True,
            'x': 1.1,  # position the dropdown slightly to the right
            'y': 1.2  # position the dropdown slightly above
        }
    ]
)

fig.show()


In [54]:
X_test_with_province = pd.merge(X_test, data[['Province']], left_index=True, right_index=True)


In [55]:
X_test_with_province

Unnamed: 0,Incidents,Latitude,Longitude,Province_encoded,Hijacking_prob,Murder_prob,Rape_prob,Province
8,450,-29.6006,30.3794,3,0.21,0.55,0.24,KwaZulu-Natal
13,280,-28.7282,24.7497,7,0.59,0.01,0.4,Northern Cape
9,700,-33.9189,25.5709,0,0.09,0.71,0.2,Eastern Cape
21,650,-23.4013,29.4179,4,0.06,0.67,0.27,Limpopo
0,1200,-26.2041,28.0473,2,0.08,0.75,0.17,Gauteng
11,400,-33.9189,25.5709,0,0.39,0.47,0.14,Eastern Cape


In [59]:
# Update the dataframe to be used
df_for_viz = X_test_with_province

initial_crime = clf.classes_[0]

fig = px.choropleth(
    data_frame=df_for_viz,
    locationmode="country names",
    locations="Province",   # Using Province as the location identifier now
    scope="africa",
    color=f'{initial_crime}_prob',
    hover_data=["Province", f'{initial_crime}_prob'],
    color_continuous_scale=px.colors.sequential.YlOrRd,
    labels={f'{initial_crime}_prob': f'Probability of {initial_crime}'},
    template="plotly_dark",
    title=f'Predicted Probabilities for {initial_crime}',
    center={"lat": -28.6139, "lon": 24.8622}
)


In [57]:

# Create the dropdown functionality
buttons = []
for crime in clf.classes_:
    button = {
        'label': crime,
        'method': 'restyle',
        'args': [
            {
                'z': [df_for_viz[f'{crime}_prob'].tolist()],
                'hoverinfo': ["location+z"],
                'hovertext': [df_for_viz[f'{crime}_prob'].tolist()]
            },
            {
                'title': f'Predicted Probabilities for {crime}'
            }
        ]
    }
    buttons.append(button)

fig.update_layout(
    updatemenus=[
        {
            'buttons': buttons,
            'direction': 'down',
            'showactive': True,
            'x': 1.1,
            'y': 1.2
        }
    ]
)

fig.show()


In [61]:
import plotly.express as px

# Choose a crime type (e.g., 'Hijacking_prob')
crime_prob = 'Hijacking_prob'

fig = px.scatter_mapbox(
    data_frame=df_for_viz,  # assuming your data is in df
    lat='Latitude',
    lon='Longitude',
    color=crime_prob,
    size=crime_prob,
    hover_data=['Province', crime_prob],
    color_continuous_scale=px.colors.sequential.YlOrRd,
    size_max=30,
    zoom=5,
    mapbox_style="open-street-map",
    title=f'Probability of {crime_prob} by Province'
)

fig.show()
