# PCP

## Cluster 12

In [8]:
import pandas as pd
import plotly.express as px

# --- Step 1: Load the Dataset ---
file_path = 'Clustered_LA_Crime_Data.csv'  # Replace with your actual file path
cl_df = pd.read_csv(file_path)

# Choose data points only in cluster 12
cl_df = cl_df[cl_df['Cluster'] == 12]

# Updated columns list without duplicate 'TIME OCC'
columns = [
    'AREA NAME', 'Vict Age', 'Vict Sex', 'Vict Descent', 'Cluster',
    'Premis Desc', 'Status Desc', 'TIME OCC', 'Month', 'Year',
    'Crm Cd Desc', 'Weapon Desc'
]
plot_data = cl_df[columns].dropna()  # Drop rows with missing values

# --- Step 4: Encode Categorical Variables Without OneHotEncoder ---
# Removed encoding to retain actual categorical names

# --- Step 5: Ensure 'Cluster' is Numeric for Coloring ---
plot_data['Cluster'] = pd.to_numeric(plot_data['Cluster'], errors='coerce')

# Drop any rows where 'Cluster' could not be converted to numeric
plot_data = plot_data.dropna(subset=['Cluster'])

# --- Step 6: Define Dimensions for the Parallel Categories Plot ---
# Exclude 'Cluster' as it will be used for coloring
dimensions = ['Vict Age', 'Vict Sex', 'Vict Descent',
              'Crm Cd Desc']

# --- Step 7: Create the Parallel Categories Plot ---
fig = px.parallel_categories(
    plot_data,
    dimensions=dimensions,
    labels={
        # 'AREA NAME': 'Area Name',
        'Vict Age': 'Victim Age',
        'Vict Sex': 'Victim Sex',
        'Vict Descent': 'Victim Descent',
        # 'Premis Desc': 'Premises Description',
        # 'Status Desc': 'Status Description',
        'Crm Cd Desc': 'Crime Code Description',
        # 'Weapon Desc': 'Weapon Description',
        'Cluster': 'Cluster ID'
    },
    title="Parallel Categories Plot for Crime Data"
)

# --- Step 8: Enhance Plot Layout for Better Readability ---
fig.update_layout(
    dragmode='select',        # Enable brushing (range selection)
    plot_bgcolor='white',     # Set plot background to white
    paper_bgcolor='white',    # Set paper background to white
    font=dict(size=12, color='black')  # Set font size and color
)

# --- Step 9: Save the Plot in Different Formats ---
fig_json = fig.to_json()
with open('parallel_categories_plot_12.json', 'w') as f:
    f.write(fig_json)

fig.write_html('parallel_categories_plot_12.html')

# --- Step 10: Display the Plot ---
fig.show()

## Cluster 15

In [7]:
import pandas as pd
import plotly.express as px

# --- Step 1: Load the Dataset ---
file_path = 'Clustered_LA_Crime_Data.csv'  # Replace with your actual file path
cl_df = pd.read_csv(file_path)

# Choose data points only in cluster 15
cl_df = cl_df[cl_df['Cluster'] == 15]

# Updated columns list without duplicate 'TIME OCC'
columns = [
    'AREA NAME', 'Vict Age', 'Vict Sex', 'Vict Descent', 'Cluster',
    'Premis Desc', 'Status Desc', 'TIME OCC', 'Month', 'Year',
    'Crm Cd Desc', 'Weapon Desc'
]
plot_data = cl_df[columns].dropna()  # Drop rows with missing values

# --- Step 4: Encode Categorical Variables Without OneHotEncoder ---
# Removed encoding to retain actual categorical names

# --- Step 5: Ensure 'Cluster' is Numeric for Coloring ---
plot_data['Cluster'] = pd.to_numeric(plot_data['Cluster'], errors='coerce')

# Drop any rows where 'Cluster' could not be converted to numeric
plot_data = plot_data.dropna(subset=['Cluster'])

# --- Step 6: Define Dimensions for the Parallel Categories Plot ---
# Exclude 'Cluster' as it will be used for coloring
dimensions = ['Vict Age', 'Vict Sex', 'Vict Descent',
              'Crm Cd Desc']

# --- Step 7: Create the Parallel Categories Plot ---
fig = px.parallel_categories(
    plot_data,
    dimensions=dimensions,
    labels={
        # 'AREA NAME': 'Area Name',
        'Vict Age': 'Victim Age',
        'Vict Sex': 'Victim Sex',
        'Vict Descent': 'Victim Descent',
        # 'Premis Desc': 'Premises Description',
        # 'Status Desc': 'Status Description',
        'Crm Cd Desc': 'Crime Code Description',
        # 'Weapon Desc': 'Weapon Description',
        'Cluster': 'Cluster ID'
    },
    title="Parallel Categories Plot for Crime Data"
)

# --- Step 8: Enhance Plot Layout for Better Readability ---
fig.update_layout(
    dragmode='select',        # Enable brushing (range selection)
    plot_bgcolor='white',     # Set plot background to white
    paper_bgcolor='white',    # Set paper background to white
    font=dict(size=12, color='black')  # Set font size and color
)

# --- Step 9: Save the Plot in Different Formats ---
fig_json = fig.to_json()
with open('parallel_categories_plot_15.json', 'w') as f:
    f.write(fig_json)

fig.write_html('parallel_categories_plot_15.html')

# --- Step 10: Display the Plot ---
fig.show()

## Cluster 18

In [None]:
import pandas as pd
import plotly.express as px

# --- Step 1: Load the Dataset ---
file_path = 'Clustered_LA_Crime_Data.csv'  # Replace with your actual file path
cl_df = pd.read_csv(file_path)

# Choose data points only in cluster 18
cl_df = cl_df[cl_df['Cluster'] == 18]

# Updated columns list without duplicate 'TIME OCC'
columns = [
    'AREA NAME', 'Vict Age', 'Vict Sex', 'Vict Descent', 'Cluster',
    'Status Desc', 'TIME OCC', 'Month', 'Year',
    'Crm Cd Desc', 'Weapon Desc'
]
plot_data = cl_df[columns].dropna()  # Drop rows with missing values

# --- Step 4: Encode Categorical Variables Without OneHotEncoder ---
# Removed encoding to retain actual categorical names

# --- Step 5: Ensure 'Cluster' is Numeric for Coloring ---
plot_data['Cluster'] = pd.to_numeric(plot_data['Cluster'], errors='coerce')

# Drop any rows where 'Cluster' could not be converted to numeric
plot_data = plot_data.dropna(subset=['Cluster'])

# --- Step 6: Define Dimensions for the Parallel Categories Plot ---
# Exclude 'Cluster' as it will be used for coloring
dimensions = ['AREA NAME', 'Vict Age', 'Vict Sex', 'Vict Descent',
              'Premis Desc', 'Status Desc', 'Crm Cd Desc', 'Weapon Desc']

# --- Step 7: Create the Parallel Categories Plot ---
fig = px.parallel_categories(
    plot_data,
    dimensions=dimensions,
    labels={
        'AREA NAME': 'Area Name',
        'Vict Age': 'Victim Age',
        'Vict Sex': 'Victim Sex',
        'Vict Descent': 'Victim Descent',
        'Premis Desc': 'Premises Description',
        'Status Desc': 'Status Description',
        'Crm Cd Desc': 'Crime Code Description',
        'Weapon Desc': 'Weapon Description',
        'Cluster': 'Cluster ID'
    },
    title="Parallel Categories Plot for Crime Data"
)

# --- Step 8: Enhance Plot Layout for Better Readability ---
fig.update_layout(
    dragmode='select',        # Enable brushing (range selection)
    plot_bgcolor='white',     # Set plot background to white
    paper_bgcolor='white',    # Set paper background to white
    font=dict(size=12, color='black')  # Set font size and color
)

# --- Step 9: Save the Plot in Different Formats ---
fig_json = fig.to_json()
with open('parallel_categories_plot_18.json', 'w') as f:
    f.write(fig_json)

fig.write_html('parallel_categories_plot_18.html')

# --- Step 10: Display the Plot ---
fig.show()