In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gps
from dataprep.eda import *
import warnings
warnings.filterwarnings("ignore")

ModuleNotFoundError: No module named 'geopandas'

In [None]:
df = pd.read_excel("Baltimore_Crime_Data_2022.xlsx")

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.duplicated().any()

In [None]:
df.isnull().sum()

In [None]:
# Get unique value counts
null_counts = [df[col].isnull().sum() for col in df.columns]

# Create a bar plot
sns.barplot(x=list(df.columns), y=null_counts, palette='rocket')

# Set axis labels and title
plt.xlabel('Columns')
plt.ylabel('Missing Value Counts')
plt.title('Missing Value Counts for Each Column')
plt.xticks(rotation=90)

# Display the plot
plt.show()

In [None]:
df.nunique()

In [None]:
# Get unique value counts
value_counts = [df[col].nunique() for col in df.columns]

# Create a bar plot
sns.barplot(x=list(df.columns), y=value_counts, palette='pastel')

# Set axis labels and title
plt.xlabel('Columns')
plt.ylabel('Unique Value Counts')
plt.title('Unique Value Counts for Each Column')
plt.xticks(rotation=90)

# Display the plot
plt.show()

In [None]:
create_report(df)

In [None]:
baltimore_shapefile = 'CSA_NSA_tracts.shp'
baltimore_data = gps.read_file(baltimore_shapefile)
fig, ax = plt.subplots(figsize=(10, 6))
baltimore_data.plot(ax=ax, color='white', edgecolor='black')
plt.title('Baltimore City', fontsize=16)
plt.xlabel('Longitude', fontsize=12)
plt.ylabel('Latitude', fontsize=12)

plt.show()

In [None]:
df_geo = gps.GeoDataFrame(df, geometry = gps.points_from_xy(df.longitude, df.latitude))
df_geo

In [None]:
world_data = gps.read_file(gps.datasets.get_path('naturalearth_lowres'))
# Filter to the United States and set figure size
US_map = world_data[world_data.iso_a3 == 'USA'].plot(figsize=(10, 6), color='white', edgecolor='black')

# Plot geo data on the map
df_geo.plot(ax=US_map, color='red', legend=True, markersize=50)

# Set title and axis labels
plt.title('Locations in the United States', fontsize=16)
plt.xlabel('Longitude', fontsize=12)
plt.ylabel('Latitude', fontsize=12)

plt.show()

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))

sns.boxplot(x='age', data=df, ax=axs[1])
axs[1].set_title("Age Boxplot")
axs[1].set_ylabel('Age', fontsize=12)

sns.histplot(x='age', data=df, bins=100, ax=axs[0])
axs[0].set_title("Age Histogram")
axs[0].set_xlabel('Age', fontsize=12)
axs[0].set_ylabel('Count', fontsize=12)

plt.suptitle('Distribution of Age', fontsize=16)
plt.show()


In [None]:
plt.figure(figsize=(8, 6))
sns.countplot(y='ethnicity', data=df, palette='Set1')
plt.xlabel('Count', fontsize=12)
plt.ylabel('Ethnicity', fontsize=12)
plt.title('Distribution of Ethnicity', fontsize=16)
plt.show()

In [None]:
# Create a figure and axis object
fig, ax = plt.subplots(figsize=(8,6))

# Plot the bar chart using Seaborn
sns.countplot(x='race', data=df, palette='Set2', ax=ax)

# Set axis labels and title
ax.set_xlabel('Race', fontsize=12)
ax.set_ylabel('Count', fontsize=12)
ax.set_title('Distribution of Race', fontsize=16)

# Rotate the x-axis labels
plt.xticks(rotation=45)

# Add text labels above each bar
for i in ax.patches:
    ax.text(i.get_x() + i.get_width() / 2, i.get_height() + 2, 
            str(i.get_height()), fontsize=11, ha='center')

plt.show()

In [None]:
# Create a figure and axis object
fig, ax = plt.subplots(figsize=(8,6))

# Plot the bar chart using Seaborn
sns.countplot(x='gender', data=df, palette='Set3', ax=ax)

# Set axis labels and title
ax.set_xlabel('Gender', fontsize=12)
ax.set_ylabel('Count', fontsize=12)
ax.set_title('Distribution of Gender', fontsize=16)

# Rotate the x-axis labels
plt.xticks(rotation=0)

# Add text labels above each bar
for i in ax.patches:
    ax.text(i.get_x() + i.get_width() / 2, i.get_height() + 0.5, 
            str(i.get_height()), fontsize=10, ha='center')

plt.show()

In [None]:
# Create a figure and axis object
fig, ax = plt.subplots(figsize=(10, 6))

# Plot the bar chart using Seaborn
sns.countplot(x='description', data=df, palette='crest', ax=ax, order=df['description'].value_counts().index)

# Set axis labels and title
ax.set_xlabel('Descriptions', fontsize=12)
ax.set_ylabel('Count', fontsize=12)
ax.set_title('Distribution of Descriptions', fontsize=16)

# Rotate the x-axis labels
plt.xticks(rotation=90)

# Add text labels above each bar
for i in ax.patches:
    ax.text(i.get_x() + i.get_width() / 2, i.get_height() + 0.5, 
            str(round(i.get_height(), 2)), fontsize=10, ha='center')

plt.show()

In [None]:
# Create a figure and axis object
fig, ax = plt.subplots(figsize=(10, 6))

# Plot the bar chart using Seaborn
sns.countplot(x='weapon', data=df, palette='flare', ax=ax, order=df['weapon'].value_counts().index)

# Set axis labels and title
ax.set_xlabel('Used Weapons', fontsize=12)
ax.set_ylabel('Count', fontsize=12)
ax.set_title('Distribution of Used Weapons', fontsize=16)

# Rotate the x-axis labels
plt.xticks(rotation=90)

# Add text labels above each bar
for i in ax.patches:
    ax.text(i.get_x() + i.get_width() / 2, i.get_height() + 0.5, 
            str(round(i.get_height(), 2)), fontsize=10, ha='center')

plt.show()

In [None]:
# Create a figure and axis object
fig, ax = plt.subplots(figsize=(8,6))

# Plot the bar chart using Seaborn
sns.countplot(x='gender', data=df, palette='Set3', ax=ax)

# Set axis labels and title
ax.set_xlabel('Gender', fontsize=12)
ax.set_ylabel('Count', fontsize=12)
ax.set_title('Distribution of Gender', fontsize=16)

# Rotate the x-axis labels
plt.xticks(rotation=0)

# Add text labels above each bar
for i in ax.patches:
    ax.text(i.get_x() + i.get_width() / 2, i.get_height() + 0.5, 
            str(i.get_height()), fontsize=10, ha='center')

plt.show()

In [None]:
# Set Seaborn style and color palette
sns.set_style("whitegrid")
sns.set_palette("pastel")

# Create a figure and axis object
fig, ax = plt.subplots(figsize=(8, 8))

# Define colors for the pie chart
colors = ['#ff9999','#66b3ff','#99ff99','#ffcc99','#ffb3e6']

# Plot the pie chart
df["district"].value_counts().plot.pie(autopct='%1.1f%%', startangle=90, colors=colors, ax=ax)

# Set axis labels and title
ax.set_xlabel('District', fontsize=12)
ax.set_ylabel('')
ax.set_title('Distribution of Districts', fontsize=16)

# Add spacing around the chart
plt.tight_layout()

plt.show()


In [None]:
# Create a dictionary of neighborhood counts
neighborhood_counts = dict(df['neighborhood'].value_counts())

# Generate word cloud image
wordcloud = WordCloud(width = 800, height = 800, background_color ='white', min_font_size = 10, colormap='viridis', max_words=df['neighborhood'].nunique()).generate_from_frequencies(neighborhood_counts)

# Display the generated image
plt.figure(figsize = (10, 10), facecolor = None)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.tight_layout(pad = 0)

# Add a title
plt.title('Most Common Crime Sites', fontsize=20, pad=20, color='black')

# Show the plot
plt.show()
