# Exploratory Data Analysis of Los Angeles Crime Data


In [None]:
# import pandas and upload a dataset to a dataframe

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from google.colab import drive

drive.mount('/content/drive')

df = pd.read_csv('/content/Crime_Data_from_2020_to_Present.csv')


In [None]:
# Display the first 5 records
df.head()

In [None]:
# Display the last 5 records of the Dataframe
df.tail()

In [None]:
# shape of the data
df.shape

In [None]:
# data information
df.info()

In [None]:
# describtive statistics
df.describe()

In [None]:
# list of columns
df.columns.tolist()

In [None]:
# check for missing values:
df.isnull().sum()

In [None]:
#checking duplicate values
df.nunique()

In [None]:
# Assuming df['AREA NAME'] contains location names
# Crimes by AREA NAME
crime_counts = df['AREA NAME'].value_counts()

# Using Matplotlib to create a count plot
plt.figure(figsize=(16, 6))
plt.bar(crime_counts.index, crime_counts, color='pink')
plt.title('Crimes Count by AREA NAME')
plt.xlabel('AREA NAME')
plt.ylabel('Crime Count')

# Rotating X-axis labels
plt.xticks(rotation = 25)

plt.show()

In [None]:
# Assuming df['Crm Cd Desc'] contains your crime descriptions
# Calculate the counts and keep only the top 25
crime_counts = df['Crm Cd Desc'].value_counts().head(25)

# Plot with Matplotlib
plt.figure(figsize=(25, 6))
plt.bar(crime_counts.index, crime_counts, color='lightgreen')
plt.title('Crime Counts of Top 25 Crm Cd Desc')
plt.xlabel('Crm Cd Desc (Defines the Crime Code provided)')
plt.ylabel('Crime Count')

# Rotate the x-axis labels to 90 degrees for better readability
plt.xticks(rotation=90)

# Show the plot
plt.show()

In [None]:
# Interactive version of plot using plotly
crime_counts = df['Crm Cd Desc'].value_counts().head(25)

# Creating an interactive bar plot with Plotly
fig = px.bar(crime_counts, x=crime_counts.index, y=crime_counts, labels={'index': 'Crm Cd Desc', 'y': 'Count'},
             title='Crime Counts of Top 25 Crm Cd Desc')
fig.update_layout(xaxis_tickangle=-45)  # Slight rotation for better readability
fig.show()

In [None]:
# Descent Code: A - Other Asian B - Black C - Chinese D - Cambodian F - Filipino G - Guamanian
# H - Hispanic/Latin/Mexican I - American Indian/Alaskan Native J - Japanese
# K - Korean L - Laotian O - Other P - Pacific Islander S - Samoan U - Hawaiian
# V - Vietnamese W - White X - Unknown Z - Asian Indian

# Crimes by Vict Descent
crime_counts = df['Vict Descent'].value_counts()

# Using Matplotlib to create a count plot
plt.figure(figsize=(16, 6))
plt.bar(crime_counts.index, crime_counts, color='skyblue')
plt.title('Crime Count by Vict Descent')
plt.xlabel('Vict Descent')
plt.ylabel('Crime Count')

# Rotating X-axis labels
plt.xticks(rotation = 25)

plt.show()

In [None]:
# Creating subplots of categorical columns
sns.set_style("darkgrid")

# Identify categorical columns
categorical_columns = ['AREA NAME', 'Vict Sex', 'Vict Descent', 'Status Desc']


plt.figure(figsize=(18, len(categorical_columns) * 3))
for idx, feature in enumerate(categorical_columns, 1):
    plt.subplot(len(categorical_columns), 2, idx)  # Adjust the layout as needed

    top_categories = df[feature].value_counts().head(10).index

    # DataFrame to display the top categories
    filtered_data = df[df[feature].isin(top_categories)]

    # A countplot for the selected data
    sns.countplot(y=filtered_data[feature], order=top_categories)
    plt.title(f"Countplot of {feature}")


# Adjust layout and show plots
plt.title('Bar Chart of Categorical Columns')
plt.tight_layout()
plt.show()

In [None]:
# Using Seaborn to create a heatmap
plt.figure(figsize=(15, 10))


df['DR_NO'] = pd.to_datetime(df['DR_NO'])
# Drop the original date column or any non-numeric columns before correlation calculation
numeric_df = df.select_dtypes(include=[np.number])


sns.heatmap(numeric_df.corr(), annot=True, fmt='.2f', cmap='Pastel2', linewidths=2)
plt.title('Correlation Heatmap')
plt.show()