<a href="https://colab.research.google.com/github/DilshanBotheju/CSACP-Project/blob/dilshan/DSGP_SVM_(2).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Data Preprocessing**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import uuid
import numpy as np
import datetime
from datetime import datetime
from transformers import BertTokenizer, BertModel
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
import json

# Reload the datasets for preprocessing
file_paths = {
    "criminal_profiling": "/content/drive/MyDrive/DSGP/criminal_profiling_data_fixed.csv",
    "psychological_data": "/content/drive/MyDrive/DSGP/psychological_data.csv"
}

# Attempt to load all datasets into memory
try:
    datasets = {name: pd.read_csv(path) for name, path in file_paths.items()}
except Exception as e:
    datasets = {"error": str(e)}

# Display the structure and first few rows of each dataset to assess preprocessing needs
{key: data.head() if isinstance(data, pd.DataFrame) else data for key, data in datasets.items()}

# Cleaning and feature extraction for each dataset


# --- Preprocessing Criminal Profiling Data ---
criminal_profiling = datasets["criminal_profiling"]

# Handle missing values and standardize column names
criminal_profiling_cleaned = criminal_profiling.rename(columns=lambda x: x.strip().replace(" ", "_").lower())
criminal_profiling_cleaned = criminal_profiling_cleaned.fillna("Unknown")

# Add a unique ID column named 'suspect_id'
criminal_profiling_cleaned['suspect_id'] = range(1, len(criminal_profiling_cleaned) + 1)

# Removing name column
criminal_profiling_cleaned = criminal_profiling_cleaned.drop(columns=['name'])

# Convert date_of_birth to datetime
criminal_profiling_cleaned['date_of_birth'] = pd.to_datetime(criminal_profiling_cleaned['date_of_birth'], errors='coerce')
today = datetime.now()

#Getting age of the criminal to current date
criminal_profiling_cleaned['age'] = criminal_profiling_cleaned['date_of_birth'].apply(lambda dob: today.year - dob.year - ((today.month, today.day) < (dob.month, dob.day)))
criminal_profiling_cleaned['age'] = criminal_profiling_cleaned['age'].fillna(-1).astype(int)
criminal_profiling_cleaned['age'] = criminal_profiling_cleaned['age'].astype(int)
criminal_profiling_cleaned = criminal_profiling_cleaned.drop(columns=['date_of_birth'])



# Seperate vicim info into two columns as victim_gender and victim_age
criminal_profiling_cleaned["victim_gender"] = criminal_profiling_cleaned["victim_info"].str.extract(r"(\bMale\b|\bFemale\b|\bUnknown\b)", expand=False).fillna("Unknown")
criminal_profiling_cleaned["victim_age"] = criminal_profiling_cleaned["victim_info"].str.extract(r"Age\s*(\d+)", expand=False)

# Set missing victim age to -1
criminal_profiling_cleaned["victim_age"] = criminal_profiling_cleaned["victim_age"].fillna(-1).astype(int)
criminal_profiling_cleaned = criminal_profiling_cleaned.drop(columns=["victim_info"])

# --- Preprocessing Psychological Data ---
psychological_data = datasets["psychological_data"]

# Standardize column names and handle missing values
psychological_data_cleaned = psychological_data.rename(columns=lambda x: x.strip().replace(" ", "_").lower())
psychological_data_cleaned = psychological_data_cleaned.fillna("Unknown")

# Add a unique ID column named 'suspect_id'
psychological_data_cleaned['suspect_id'] = range(1, len(psychological_data_cleaned) + 1)

# Combine stress level and anger management issues into a "risk_factors" feature
psychological_data_cleaned["risk_factors"] = psychological_data_cleaned[
    ["stress_level", "anger_management_issues"]
].apply(lambda x: ", ".join([v for v in x if v != "Unknown"]), axis=1)


# Display cleaned datasets
criminal_profiling_cleaned.head(), psychological_data_cleaned.head()

# Link datasets using "suspect_id"
criminal_data_processed = criminal_profiling_cleaned.merge(psychological_data_cleaned,on="suspect_id",how="left")

# Making "suspect_id" the first column
criminal_data_processed = criminal_data_processed[['suspect_id'] + [col for col in criminal_data_processed.columns if col != 'suspect_id']]
criminal_data_processed.head()
print(criminal_data_processed.columns)












Index(['suspect_id', 'gender', 'nationality', 'criminal_record', 'residence',
       'crime_type', 'crime_date', 'crime_time', 'latitude', 'longitude',
       'evidence_collected', 'age', 'victim_gender', 'victim_age',
       'personality_traits', 'mental_health_status', 'cognitive_function',
       'substance_abuse_history', 'anger_management_issues', 'past_trauma',
       'risk_of_recidivism', 'stress_level', 'risk_factors'],
      dtype='object')


# Dummy model with using **SVM**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, accuracy_score
from sklearn.cluster import KMeans
from sklearn.multioutput import MultiOutputClassifier

# --- Dummy Dataset  ---

# Set random seed for reproducibility
np.random.seed(42)

num_samples = 20

# Create dummy columns
df = pd.DataFrame({
    'CrimeDate': pd.to_datetime(np.random.choice(pd.date_range('2010-01-01', '2020-01-01', freq='D'), num_samples)),
    'CrimeTime': np.random.choice(['12:00', '13:00', '14:00', '15:00', '16:00'], num_samples),
    'CrimeCode': np.random.randint(100, 999, num_samples),
    'Location': np.random.choice(['Urban', 'Rural'], num_samples),
    'Description': np.random.choice(['Robbery', 'Assault', 'Burglary', 'Fraud', 'Murder', 'Vandalism'], num_samples),
    'Weapon': np.random.choice(['Knife', 'Gun', 'None'], num_samples),
    'Total Incidents': np.random.randint(1, 5, num_samples),
    'Year': np.random.randint(2010, 2020, num_samples),
    'Month': np.random.randint(1, 13, num_samples),
    'Day': np.random.randint(1, 32, num_samples),
    'Latitude': np.random.uniform(-90, 90, num_samples),
    'Longitude': np.random.uniform(-180, 180, num_samples),
    'crime_id': np.random.randint(1000, 9999, num_samples),
    'name': np.random.choice(['John Doe', 'Jane Smith', 'Alex Brown', 'Chris White'], num_samples),
    'date_of_birth': pd.to_datetime(np.random.choice(pd.date_range('1980-01-01', '2000-01-01', freq='D'), num_samples)),
    'gender': np.random.choice(['Male', 'Female'], num_samples),
    'nationality': np.random.choice(['USA', 'UK', 'Canada', 'Australia'], num_samples),
    'criminal_record': np.random.choice(['Yes', 'No'], num_samples),
    'residence': np.random.choice(['Urban', 'Rural'], num_samples),
    'crime_type': np.random.choice(['violent', 'property', 'drug-related'], num_samples),
    'evidence_collected': np.random.choice(['Yes', 'No'], num_samples),
    'victim_info': np.random.choice(['Injured', 'No Injuries'], num_samples),
    'suspect_id': np.random.randint(1000, 9999, num_samples),
    'personality_traits': np.random.choice(['Introverted', 'Extroverted', 'Aggressive', 'Calm'], num_samples),
    'mental_health_status': np.random.choice(['unknown', 'stable', 'unstable'], num_samples),
    'cognitive_function': np.random.choice(['unknown', 'normal', 'impaired'], num_samples),
    'substance_abuse_history': np.random.choice(['unknown', 'yes', 'no'], num_samples),
    'anger_management_issues': np.random.choice(['unknown', 'yes', 'no'], num_samples),
    'past_trauma': np.random.choice(['unknown', 'yes', 'no'], num_samples),
    'risk_of_recidivism': np.random.choice(['unknown', 'high', 'low'], num_samples),
    'stress_level': np.random.choice(['low', 'medium', 'high'], num_samples),
    'risk_factors': np.random.choice(['unknown', 'yes', 'no'], num_samples),
})

# --- Encode Target Variables ---
df['mental_health_status_encoded'] = df['mental_health_status'].map({'unknown': 0, 'stable': 1, 'unstable': 2})
df['personality_traits_encoded'] = df['personality_traits'].map({'Introverted': 0, 'Extroverted': 1, 'Aggressive': 2, 'Calm': 3})
df['substance_abuse_history_encoded'] = df['substance_abuse_history'].map({'unknown': 0, 'yes': 1, 'no': 2})
df['cognitive_function_encoded'] = df['cognitive_function'].map({'unknown': 0, 'normal': 1, 'impaired': 2})
df['anger_management_issues_encoded'] = df['anger_management_issues'].map({'unknown': 0, 'yes': 1, 'no': 2})
df['past_trauma_encoded'] = df['past_trauma'].map({'unknown': 0, 'yes': 1, 'no': 2})
df['risk_of_recidivism_encoded'] = df['risk_of_recidivism'].map({'unknown': 0, 'high': 1, 'low': 2})
df['stress_level_encoded'] = df['stress_level'].map({'low': 0, 'medium': 1, 'high': 2})
df['risk_factors_encoded'] = df['risk_factors'].map({'unknown': 0, 'yes': 1, 'no': 2})

# --- Feature Selection ---
# Use 'Description' for text and include relevant numeric features
text_features = df['Description']
numeric_features = df[['Latitude', 'Longitude', 'Total Incidents']]  # Include additional numeric features as needed
categorical_features = df[['Weapon', 'Location']]  # Categorical variables

# --- Text Vectorization (TF-IDF) ---
# Convert the 'Description' column into numeric features using TF-IDF
vectorizer = TfidfVectorizer(max_features=100)
text_features_tfidf = vectorizer.fit_transform(text_features).toarray()

# ---One-Hot Encode Categorical Features ---
categorical_features_encoded = pd.get_dummies(categorical_features)

# ---Combine Features ---
X = np.hstack((text_features_tfidf, numeric_features, categorical_features_encoded))
y = df[['mental_health_status_encoded', 'personality_traits_encoded', 'substance_abuse_history_encoded', 'cognitive_function_encoded',
        'anger_management_issues_encoded', 'past_trauma_encoded', 'risk_of_recidivism_encoded', 'stress_level_encoded', 'risk_factors_encoded']]

# ---Train-Test Split ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ---Multi-output Model with SVC ---
svc_model = SVC(random_state=42)
multi_target_model = MultiOutputClassifier(svc_model, n_jobs=-1)

# --- Train the model ---
multi_target_model.fit(X_train, y_train)

# --- Predictions ---
y_pred = multi_target_model.predict(X_test)

# --- Evaluate Model ---
# Calculate accuracy for each target variable separately
accuracies = [accuracy_score(y_test.iloc[:, i], y_pred[:, i]) for i in range(y_test.shape[1])]

# Print individual accuracies and average accuracy
print("Accuracies for each target variable:", accuracies)
average_accuracy = np.mean(accuracies)
print(f"Average Model Accuracy: {average_accuracy:.2f}")

# Print classification report for each target variable
for i in range(y_test.shape[1]):
    print(f"Classification Report for target variable {y_test.columns[i]}:\n",
          classification_report(y_test.iloc[:, i], y_pred[:, i]))

# --- Crime Pattern Clustering ---
# KMeans clustering to identify patterns in crimes
kmeans = KMeans(n_clusters=3, random_state=42)  # Assuming 3 clusters (adjust as needed)
crime_clusters = kmeans.fit_predict(X)

# Add the cluster labels to the dataframe
df['crime_cluster'] = crime_clusters

# ---  Crime Pattern Report ---
crime_patterns = df.groupby(['crime_cluster', 'crime_type', 'Weapon', 'Location']).size().reset_index(name='Count')
print("Detected Crime Patterns:\n", crime_patterns)

# --- Crime Behavior Report ---
report_data = {
    'Crime Description': df['Description'],
    'Predicted Mental Health Status': multi_target_model.predict(X)[:, 0],
    'Predicted Personality Traits': multi_target_model.predict(X)[:, 1],
    'Predicted Substance Abuse History': multi_target_model.predict(X)[:, 2],
    'Predicted Cognitive Function': multi_target_model.predict(X)[:, 3],
    'Predicted Anger Management Issues': multi_target_model.predict(X)[:, 4],
    'Predicted Past Trauma': multi_target_model.predict(X)[:, 5],
    'Predicted Risk of Recidivism': multi_target_model.predict(X)[:, 6],
    'Predicted Stress Level': multi_target_model.predict(X)[:, 7],
    'Predicted Risk Factors': multi_target_model.predict(X)[:, 8],
    'Crime Cluster': df['crime_cluster'],
}

report_df = pd.DataFrame(report_data)

# Define label mappings for each target variable
mental_health_status_map = {0: 'unknown', 1: 'stable', 2: 'unstable'}
personality_traits_map = {0: 'Introverted', 1: 'Extroverted', 2: 'Aggressive', 3: 'Calm'}
substance_abuse_history_map = {0: 'unknown', 1: 'yes', 2: 'no'}
cognitive_function_map = {0: 'unknown', 1: 'normal', 2: 'impaired'}
anger_management_issues_map = {0: 'unknown', 1: 'yes', 2: 'no'}
past_trauma_map = {0: 'unknown', 1: 'yes', 2: 'no'}
risk_of_recidivism_map = {0: 'unknown', 1: 'high', 2: 'low'}
stress_level_map = {0: 'low', 1: 'medium', 2: 'high'}
risk_factors_map = {0: 'unknown', 1: 'yes', 2: 'no'}

# Print predictions with original labels
print("\nPredicted Details for Each Crime Incident:")
for idx, row in report_df.iterrows():
    print(f"\nCrime Description: {row['Crime Description']}")
    print(f"Crime Cluster: {row['Crime Cluster']}")
    print(f"Predicted Mental Health Status: {mental_health_status_map[row['Predicted Mental Health Status']]}")
    print(f"Predicted Personality Traits: {personality_traits_map[row['Predicted Personality Traits']]}")
    print(f"Predicted Substance Abuse History: {substance_abuse_history_map[row['Predicted Substance Abuse History']]}")
    print(f"Predicted Cognitive Function: {cognitive_function_map[row['Predicted Cognitive Function']]}")
    print(f"Predicted Anger Management Issues: {anger_management_issues_map[row['Predicted Anger Management Issues']]}")
    print(f"Predicted Past Trauma: {past_trauma_map[row['Predicted Past Trauma']]}")
    print(f"Predicted Risk of Recidivism: {risk_of_recidivism_map[row['Predicted Risk of Recidivism']]}")
    print(f"Predicted Stress Level: {stress_level_map[row['Predicted Stress Level']]}")
    print(f"Predicted Risk Factors: {risk_factors_map[row['Predicted Risk Factors']]}")


# Save the report to a CSV file
report_df.to_csv("crime_behavior_report.csv", index=False)
print("\nReport saved to crime_behavior_report.csv")


Accuracies for each target variable: [0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Average Model Accuracy: 0.11
Classification Report for target variable mental_health_status_encoded:
               precision    recall  f1-score   support

           1       0.50      1.00      0.67         1
           2       0.00      0.00      0.00         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2

Classification Report for target variable personality_traits_encoded:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         1
           3       1.00      1.00      1.00         1

    accuracy                           0.50         2
   macro avg       0.33      0.33      0.33         2
weighted avg       0.50      0.50      0.50         2

Classification Report for target var

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

Classification Report for target variable risk_of_recidivism_encoded:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       1.0
           1       0.00      0.00      0.00       0.0
           2       0.00      0.00      0.00       1.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0

Classification Report for target variable stress_level_encoded:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       1.0
           1       0.00      0.00      0.00       1.0
           2       0.00      0.00      0.00       0.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0

Classification Report for target variable risk_factors_encoded:
               precision    recall  f1

# Dummy model using SVM and BERT (Bidirectional Encoder Representations from Transformers)

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from transformers import BertTokenizer, BertModel
import torch
import matplotlib.pyplot as plt
import seaborn as sns

# Step 1: Load the Dataset
data = {
    "suspect_id": list(range(1, 21)),
    "gender": ["Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female",
               "Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female"],
    "nationality": ["American", "British", "Canadian", "Australian", "Indian", "American", "British", "Canadian", "Australian", "Indian",
                    "American", "British", "Canadian", "Australian", "Indian", "American", "British", "Canadian", "Australian", "Indian"],
    "criminal_record": ["Yes", "No", "Yes", "Yes", "No", "No", "Yes", "No", "Yes", "Yes",
                        "No", "Yes", "No", "Yes", "No", "Yes", "Yes", "No", "Yes", "No"],
    "residence": ["New York", "London", "Toronto", "Sydney", "Mumbai", "Los Angeles", "Vancouver", "Melbourne", "Delhi", "Kolkata",
                  "San Francisco", "Paris", "Montreal", "Brisbane", "Bangalore", "Chicago", "Edinburgh", "Ottawa", "Perth", "Chennai"],
    "crime_type": ["Burglary", "Homicide", "Fraud", "Arson", "Assault", "Burglary", "Homicide", "Fraud", "Arson", "Assault",
                   "Burglary", "Homicide", "Fraud", "Arson", "Assault", "Burglary", "Homicide", "Fraud", "Arson", "Assault"],
    "crime_date": ["2024-01-10", "2024-01-15", "2024-02-01", "2024-02-20", "2024-03-01", "2024-03-05", "2024-03-07", "2024-03-10", "2024-04-01", "2024-04-10",
                   "2024-04-15", "2024-04-20", "2024-05-01", "2024-05-05", "2024-05-10", "2024-06-01", "2024-06-10", "2024-07-01", "2024-07-05", "2024-07-10"],
    "crime_time": ["22:30", "14:00", "10:15", "02:30", "19:45", "23:00", "16:30", "18:00", "12:00", "01:30",
                   "15:30", "21:00", "17:00", "03:00", "13:00", "20:00", "22:00", "16:00", "09:30", "01:00"],
    "latitude": [40.7128, 51.5074, 43.6511, -33.8688, 19.0760, 34.0522, 49.2827, -37.8136, 28.6139, 22.5726,
                 37.7749, 48.8566, 45.4215, -27.4698, 12.9716, 41.8781, 55.9533, 45.4215, -31.9505, 13.0827],
    "longitude": [-74.0060, -0.1278, -79.3470, 151.2093, 72.8777, -118.2437, -123.1216, 144.9631, 77.2090, 88.3639,
                  -122.4194, 2.3522, -75.6972, 153.0251, 77.5946, -87.6298, -3.1883, -75.6972, 115.8605, 80.2785],
    "evidence_collected": ["Fingerprints", "DNA", "Financial Records", "Burn Patterns", "CCTV Footage", "Fingerprints", "DNA", "Financial Records", "Burn Patterns", "CCTV Footage",
                           "Fingerprints", "DNA", "Financial Records", "Burn Patterns", "CCTV Footage", "Fingerprints", "DNA", "Financial Records", "Burn Patterns", "CCTV Footage"],
    "age": [32, 28, 45, 35, 30, 50, 40, 22, 35, 27, 29, 34, 42, 23, 30, 40, 33, 31, 35, 28],
    "victim_gender": ["Male", "Female", "Male", "Male", "Female", "Male", "Female", "Male", "Male", "Female",
                      "Male", "Female", "Male", "Male", "Female", "Male", "Female", "Male", "Male", "Female"],
    "victim_age": [40, 25, 50, 45, 20, 55, 30, 40, 33, 27, 35, 28, 50, 23, 28, 34, 33, 29, 30, 24],
    "personality_traits": ["Impulsive", "Organized", "Greedy", "Reckless", "Aggressive", "Impulsive", "Organized", "Greedy", "Reckless", "Aggressive",
                           "Impulsive", "Organized", "Greedy", "Reckless", "Aggressive", "Impulsive", "Organized", "Greedy", "Reckless", "Aggressive"],
    "mental_health_status": ["Stable", "Depressed", "Stable", "Anxious", "Stable", "Stable", "Depressed", "Stable", "Anxious", "Stable",
                             "Stable", "Depressed", "Stable", "Anxious", "Stable", "Stable", "Depressed", "Stable", "Anxious", "Stable"],
    "cognitive_function": ["Normal", "Normal", "Above Average", "Normal", "Below Average", "Normal", "Normal", "Above Average", "Normal", "Below Average",
                           "Normal", "Normal", "Above Average", "Normal", "Below Average", "Normal", "Normal", "Above Average", "Normal", "Below Average"],
    "substance_abuse_history": ["None", "Alcohol", "None", "Drugs", "None", "Alcohol", "None", "Drugs", "None", "Alcohol",
                                "None", "Alcohol", "None", "Drugs", "None", "Alcohol", "None", "Alcohol", "None", "Drugs"],
    "anger_management_issues": ["Yes", "No", "Yes", "Yes", "No", "No", "Yes", "No", "Yes", "Yes",
                                "No", "Yes", "No", "Yes", "No", "Yes", "Yes", "No", "Yes", "No"],
    "past_trauma": ["No", "Yes", "Yes", "Yes", "No", "Yes", "Yes", "Yes", "No", "No",
                    "Yes", "Yes", "No", "Yes", "No", "Yes", "Yes", "No", "Yes", "No"],
    "risk_of_recidivism": ["High", "Medium", "High", "High", "Low", "Medium", "High", "Low", "Medium", "High",
                           "Low", "Medium", "High", "Low", "Medium", "High", "Low", "Medium", "High", "Low"],
    "stress_level": ["High", "Medium", "Medium", "High", "Low", "Medium", "High", "Medium", "High", "Low",
                     "High", "Medium", "Low", "High", "Medium", "Low", "High", "Medium", "Low", "Medium"],
    "risk_factors": ["Anger, Stress", "Trauma", "Greed", "Drugs, Anger", "None", "Stress, Trauma", "Anger, Drugs", "None", "Stress", "Anger",
                     "Drugs, Anger", "None", "Anger, Stress", "Stress, Trauma", "Greed", "Anger", "Anger, Drugs", "Greed", "Anger, Stress", "None"],
    "description": [
        "Breaking into a house and stealing valuables.",
        "Stabbing in a park during the afternoon.",
        "Forgery of documents to embezzle funds.",
        "Setting fire to a building late at night.",
        "Physical assault in a parking lot.",
        "Breaking into an office to steal equipment.",
        "Murder in a public place during a fight.",
        "Fraudulent transactions involving company accounts.",
        "Arson in a vacant warehouse.",
        "Beating a person in a bar fight.",
        "Breaking into a car to steal valuables.",
        "Murder at a nightclub.",
        "Embezzling company funds through forged checks.",
        "Setting fire to a forest.",
        "Assaulting a person during a street altercation.",
        "Burglary of a jewelry store.",
        "Murder in a public park.",
        "Fraud involving fake insurance claims.",
        "Arson in an abandoned building.",
        "Assaulting a person during a robbery."
    ],
    "weapon": ["Crowbar", "Knife", "None", "Fire", "Fists", "Crowbar", "Knife", "None", "Fire", "Fists",
               "Crowbar", "Knife", "None", "Fire", "Fists", "Crowbar", "Knife", "None", "Fire", "Fists"],
    "location": ["Residential Area", "Public Park", "Office", "Apartment Complex", "Mall", "Office", "Public Square", "Bank", "Warehouse", "Bar",
                 "Residential Area", "Nightclub", "Office", "Forest", "Street", "Jewelry Store", "Public Park", "Insurance Company", "Abandoned Building", "Robbery"],
    "victim_report": [
        "The suspect was wearing a black hoodie and gloves.",
        "The suspect was seen fleeing the scene in a white car.",
        "The suspect had accessed financial accounts illegally.",
        "Witnesses reported seeing someone with a gasoline can.",
        "The victim described the attacker as tall with short hair.",
        "The suspect was wearing a business suit and glasses.",
        "The victim was attacked by someone with a knife.",
        "Witnesses saw the suspect transferring money from an account.",
        "The suspect was seen near the warehouse with a matchstick.",
        "The victim identified the suspect as a regular bar patron.",
        "The suspect was wearing a leather jacket and carrying a bag.",
        "The victim reported a person in dark clothes attacking.",
        "Witnesses saw a man entering the company office suspiciously.",
        "The suspect was seen in a wooded area with a gasoline container.",
        "The victim was assaulted by a person with tattoos.",
        "The suspect was wearing a mask during the robbery.",
        "Witnesses reported the victim being chased by the suspect.",
        "The victim identified the attacker as wearing a fake ID.",
        "The victim reported the suspect leaving quickly after setting fire.",
        "Witnesses saw a person escaping on foot after the assault."
    ]
}

df = pd.DataFrame(data)

# Step 2: Preprocessing

# Encode categorical variables
le_crime_type = LabelEncoder()
df["crime_type_encoded"] = le_crime_type.fit_transform(df["crime_type"])

le_gender = LabelEncoder()
df["gender_encoded"] = le_gender.fit_transform(df["gender"])

# Categorize age
df["age_category"] = pd.cut(df["age"], bins=[0, 18, 30, 45, 60, 100],
                            labels=["Child", "Young Adult", "Adult", "Middle Age", "Senior"])
le_age_category = LabelEncoder()
df["age_category_encoded"] = le_age_category.fit_transform(df["age_category"])

# Combine text columns for BERT input
df["text_data"] = df["description"] + " " + df["victim_report"]

# Extract features for SVM
svm_features = ["gender_encoded", "age_category_encoded", "latitude", "longitude",
                "stress_level", "risk_of_recidivism"]
X_svm = df[svm_features]
y_svm = df["crime_type_encoded"]

# Convert categorical variables to numerical representations
stress_level_mapping = {'Low': 0, 'Medium': 1, 'High': 2}
risk_of_recidivism_mapping = {'Low': 0, 'Medium': 1, 'High': 2}

X_svm['stress_level'] = X_svm['stress_level'].map(stress_level_mapping)
X_svm['risk_of_recidivism'] = X_svm['risk_of_recidivism'].map(risk_of_recidivism_mapping)

# Standardize numerical features
scaler = StandardScaler()
X_svm = scaler.fit_transform(X_svm)

# Step 3: Prepare BERT Features

# Load BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
bert_model = BertModel.from_pretrained("bert-base-uncased")

# Tokenize text data
def bert_encode(texts, tokenizer, max_len=128):
    if isinstance(texts, pd.Series):
        texts = texts.tolist()
    tokens = tokenizer(texts, padding=True, truncation=True, max_length=max_len, return_tensors="pt")
    return tokens

tokens = bert_encode(df["text_data"], tokenizer)
with torch.no_grad():
    bert_features = bert_model(**tokens).pooler_output.numpy()

# Step 4: Combine SVM and BERT Features
X_combined = np.hstack([X_svm, bert_features])
y_combined = y_svm

# Step 5: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X_combined, y_combined, test_size=0.2, random_state=42)

# Step 6: Train SVM Model
svm_model = SVC(kernel="linear", probability=True, random_state=42)
svm_model.fit(X_train, y_train)

# Step 7: Predict and Evaluate
y_pred = svm_model.predict(X_test)

# Calculate Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.3f}")

# Classification Report
# Get unique labels in y_test
unique_labels = np.unique(y_test)

# Filter target names to include only those present in y_test
target_names = [le_crime_type.classes_[label] for label in unique_labels]

report = classification_report(y_test, y_pred, labels=unique_labels, target_names=target_names)
print("Classification Report:\n", report)


# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=le_crime_type.classes_, yticklabels=le_crime_type.classes_)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

# Step 8: Cross-validation (Optional)
# = cross_val_score(svm_model, X_combined, y_combined, cv=2)
#print(f"Cross-Validation Accuracy: {cv_scores.mean():.3f} ± {cv_scores.std():.3f}")
# Step 9: Predict New Data

# Example new input data
new_data = {
    "gender_encoded": 1,  # Female
    "age_category_encoded": 1,  # Young Adult
    "latitude": 51.5074,
    "longitude": -0.1278,
    "stress_level": 2,  # Medium
    "risk_of_recidivism": 1,  # Medium Risk
    "text_data": "A knife attack in a busy market during the evening. Witnesses saw a masked person fleeing."
}

# Preprocess new data for SVM and BERT
svm_input = scaler.transform([[new_data[col] for col in svm_features]])
bert_input = bert_encode([new_data["text_data"]], tokenizer)
with torch.no_grad():
    bert_feature = bert_model(**bert_input).pooler_output.numpy()

# Combine features
combined_input = np.hstack([svm_input, bert_feature])

# Predict
predicted_crime_type = svm_model.predict(combined_input)
predicted_age_category = le_age_category.inverse_transform([new_data["age_category_encoded"]])
predicted_mental_health = "Stable"  # Just an example, you can create a model to predict this as well
predicted_personality_trait = "Impulsive"  # Similar to above

print(f"Predicted Crime Type: {le_crime_type.inverse_transform(predicted_crime_type)}")
print(f"Predicted Age Category: {predicted_age_category}")
print(f"Predicted Mental Health Status: {predicted_mental_health}")
print(f"Predicted Personality Traits: {predicted_personality_trait}")


ValueError: All arrays must be of the same length

# **New model with using SVM**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import numpy as np
# Data set
data = {
    "suspect_id": list(range(1, 21)),
    "gender": ["Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female",
               "Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female"],
    "nationality": ["American", "British", "Canadian", "Australian", "Indian", "American", "British", "Canadian", "Australian", "Indian",
                    "American", "British", "Canadian", "Australian", "Indian", "American", "British", "Canadian", "Australian", "Indian"],
    "criminal_record": ["Yes", "No", "Yes", "Yes", "No", "No", "Yes", "No", "Yes", "Yes",
                        "No", "Yes", "No", "Yes", "No", "Yes", "Yes", "No", "Yes", "No"],
    "residence": ["New York", "London", "Toronto", "Sydney", "Mumbai", "Los Angeles", "Vancouver", "Melbourne", "Delhi", "Kolkata",
                  "San Francisco", "Paris", "Montreal", "Brisbane", "Bangalore", "Chicago", "Edinburgh", "Ottawa", "Perth", "Chennai"],
    "crime_type": ["Burglary", "Homicide", "Fraud", "Arson", "Assault", "Burglary", "Homicide", "Fraud", "Arson", "Assault",
                   "Burglary", "Homicide", "Fraud", "Arson", "Assault", "Burglary", "Homicide", "Fraud", "Arson", "Assault"],
    "crime_date": ["2024-01-10", "2024-01-15", "2024-02-01", "2024-02-20", "2024-03-01", "2024-03-05", "2024-03-07", "2024-03-10", "2024-04-01", "2024-04-10",
                   "2024-04-15", "2024-04-20", "2024-05-01", "2024-05-05", "2024-05-10", "2024-06-01", "2024-06-10", "2024-07-01", "2024-07-05", "2024-07-10"],
    "crime_time": ["22:30", "14:00", "10:15", "02:30", "19:45", "23:00", "16:30", "18:00", "12:00", "01:30",
                   "15:30", "21:00", "17:00", "03:00", "13:00", "20:00", "22:00", "16:00", "09:30", "01:00"],
    "latitude": [40.7128, 51.5074, 43.6511, -33.8688, 19.0760, 34.0522, 49.2827, -37.8136, 28.6139, 22.5726,
                 37.7749, 48.8566, 45.4215, -27.4698, 12.9716, 41.8781, 55.9533, 45.4215, -31.9505, 13.0827],
    "longitude": [-74.0060, -0.1278, -79.3470, 151.2093, 72.8777, -118.2437, -123.1216, 144.9631, 77.2090, 88.3639,
                  -122.4194, 2.3522, -75.6972, 153.0251, 77.5946, -87.6298, -3.1883, -75.6972, 115.8605, 80.2785],
    "evidence_collected": ["Fingerprints", "DNA", "Financial Records", "Burn Patterns", "CCTV Footage", "Fingerprints", "DNA", "Financial Records", "Burn Patterns", "CCTV Footage",
                           "Fingerprints", "DNA", "Financial Records", "Burn Patterns", "CCTV Footage", "Fingerprints", "DNA", "Financial Records", "Burn Patterns", "CCTV Footage"],
    "age": [32, 28, 45, 35, 30, 50, 40, 22, 35, 27, 29, 34, 42, 23, 30, 40, 33, 31, 35, 28],
    "victim_gender": ["Male", "Female", "Male", "Male", "Female", "Male", "Female", "Male", "Male", "Female",
                      "Male", "Female", "Male", "Male", "Female", "Male", "Female", "Male", "Male", "Female"],
    "victim_age": [40, 25, 50, 45, 20, 55, 30, 40, 33, 27, 35, 28, 50, 23, 28, 34, 33, 29, 30, 24],
    "personality_traits": ["Impulsive", "Organized", "Greedy", "Reckless", "Aggressive", "Impulsive", "Organized", "Greedy", "Reckless", "Aggressive",
                           "Impulsive", "Organized", "Greedy", "Reckless", "Aggressive", "Impulsive", "Organized", "Greedy", "Reckless", "Aggressive"],
    "mental_health_status": ["Stable", "Depressed", "Stable", "Anxious", "Stable", "Stable", "Depressed", "Stable", "Anxious", "Stable",
                             "Stable", "Depressed", "Stable", "Anxious", "Stable", "Stable", "Depressed", "Stable", "Anxious", "Stable"],
    "cognitive_function": ["Normal", "Normal", "Above Average", "Normal", "Below Average", "Normal", "Normal", "Above Average", "Normal", "Below Average",
                           "Normal", "Normal", "Above Average", "Normal", "Below Average", "Normal", "Normal", "Above Average", "Normal", "Below Average"],
    "substance_abuse_history": ["None", "Alcohol", "None", "Drugs", "None", "Alcohol", "None", "Drugs", "None", "Alcohol",
                                "None", "Alcohol", "None", "Drugs", "None", "Alcohol", "None", "Alcohol", "None", "Drugs"],
    "anger_management_issues": ["Yes", "No", "Yes", "Yes", "No", "No", "Yes", "No", "Yes", "Yes",
                                "No", "Yes", "No", "Yes", "No", "Yes", "Yes", "No", "Yes", "No"],
    "past_trauma": ["No", "Yes", "Yes", "Yes", "No", "Yes", "Yes", "Yes", "No", "No",
                    "Yes", "Yes", "No", "Yes", "No", "Yes", "Yes", "No", "Yes", "No"],
    "risk_of_recidivism": ["High", "Medium", "High", "High", "Low", "Medium", "High", "Low", "Medium", "High",
                           "Low", "Medium", "High", "Low", "Medium", "High", "Low", "Medium", "High", "Low"],
    "stress_level": ["High", "Medium", "Medium", "High", "Low", "Medium", "High", "Medium", "High", "Low",
                     "High", "Medium", "Low", "High", "Medium", "Low", "High", "Medium", "Low", "Medium"],
    "risk_factors": ["Anger, Stress", "Trauma", "Greed", "Drugs, Anger", "None", "Stress, Trauma", "Anger, Drugs", "None", "Stress", "Anger",
                     "Drugs, Anger", "None", "Anger, Stress", "Stress, Trauma", "Greed", "Anger", "Anger, Drugs", "Greed", "Anger, Stress", "None"],
    "weapon": ["Gun", "Knife", "None", "Fire", "Bat", "Gun", "Knife", "None", "Fire", "Bat",
               "Gun", "Knife", "None", "Fire", "Bat", "Gun", "Knife", "None", "Fire", "Bat"],
    "location": ["Park", "Office", "Street", "Building", "Parking Lot", "Office", "Public Place", "Company", "Warehouse", "Bar",
                 "Car", "Nightclub", "Company", "Forest", "Street", "Jewelry Store", "Public Park", "Insurance Office", "Building", "Street"],
    "description": [
        "Breaking into a house and stealing valuables.",
        "Stabbing in a park during the afternoon.",
        "Forgery of documents to embezzle funds.",
        "Setting fire to a building late at night.",
        "Physical assault in a parking lot.",
        "Breaking into an office to steal equipment.",
        "Murder in a public place during a fight.",
        "Fraudulent transactions involving company accounts.",
        "Arson in a vacant warehouse.",
        "Beating a person in a bar fight.",
        "Breaking into a car to steal valuables.",
        "Murder at a nightclub.",
        "Embezzling company funds through forged checks.",
        "Setting fire to a forest.",
        "Assaulting a person during a street altercation.",
        "Burglary of a jewelry store.",
        "Murder in a public park.",
        "Fraud involving fake insurance claims.",
        "Arson in an abandoned building.",
        "Assaulting a person during a heated argument with the suspect."
    ],
    "victim_report": [
        "The suspect was seen breaking in through the window.",
        "The victim was stabbed multiple times in the park.",
        "The suspect forged the documents and withdrew funds.",
        "The suspect was seen setting fire to the building.",
        "The victim was beaten and left unconscious in the parking lot.",
        "The suspect broke into the office and stole the equipment.",
        "The suspect killed the victim after a heated argument.",
        "The suspect withdrew large sums from the company account fraudulently.",
        "The suspect set fire to the warehouse while it was empty.",
        "The victim was attacked in a bar by the suspect.",
        "The suspect broke into the car and stole a laptop.",
        "The suspect killed the victim with a knife in a nightclub.",
        "The suspect forged the company's checks and transferred funds.",
        "The suspect set fire to the forest, causing extensive damage.",
        "The victim was assaulted by the suspect in a street altercation.",
        "The suspect stole jewelry from a store during the night.",
        "The suspect killed the victim in a public park.",
        "The suspect filed a fake insurance claim to collect money.",
        "The suspect set fire to the building with a flare gun.",
        "The victim was assaulted during a heated argument with the suspect."
    ]
}


# Convert the data dictionary to a pandas DataFrame

df = pd.DataFrame(data)

# Encode categorical variables
label_encoder = LabelEncoder()

# List of categorical columns to encode
categorical_columns = ['gender', 'nationality', 'criminal_record', 'residence', 'crime_type', 'victim_gender',
                       'personality_traits', 'mental_health_status', 'cognitive_function', 'substance_abuse_history',
                       'anger_management_issues', 'past_trauma', 'risk_of_recidivism', 'stress_level', 'risk_factors',
                       'weapon', 'location']

# Apply label encoding to categorical columns
for col in categorical_columns:
    df[col] = label_encoder.fit_transform(df[col])

# Preprocess text data
text_columns = ['description', 'victim_report']

# Define a pipeline for text processing
text_transformer = TfidfVectorizer(stop_words='english')

# Method to convert age to get age categories
def age_to_category(age):
    if age <= 25 and age >0:
        return 0
    elif age <= 35:
        return 1
    elif age <= 45:
        return 2
    elif age <= 55:
        return 3
    else:
        return 4

df['age_category'] = df['age'].apply(age_to_category)

# Define features and target variables based on type of data
numerical_features = ['age', 'latitude', 'longitude', 'victim_age']
categorical_features = ['gender', 'nationality', 'criminal_record', 'residence', 'evidence_collected',
                       'victim_gender', 'personality_traits', 'mental_health_status', 'cognitive_function',
                       'substance_abuse_history', 'anger_management_issues', 'past_trauma',
                       'risk_of_recidivism', 'stress_level', 'risk_factors', 'weapon', 'location',
                       'crime_type']
text_features = ['description', 'victim_report']

# Features
X = df[numerical_features + categorical_features + text_features]

# Target variables to get predictions
y = df[['crime_type', 'mental_health_status', 'age_category']]

# Training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a pipeline with preprocessing and model
pipeline = Pipeline(steps=[
    ('preprocessor', ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numerical_features),  # Apply StandardScaler to numerical features only
            ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical_features),  # OneHotEncoder for categorical
            ('text', TfidfVectorizer(stop_words='english'), 'description'),
            ('text_victim', TfidfVectorizer(stop_words='english'), 'victim_report')
        ])),
    ('model', MultiOutputClassifier(RandomForestClassifier(random_state=42)))  # Using RandomForest for multi-output classification
])

# Fit to the model
pipeline.fit(X_train, y_train)

# Get predictions
y_pred = pipeline.predict(X_test)

# Evaluate the model
crime_type_accuracy = accuracy_score(y_test['crime_type'], y_pred[:, 0])
mental_health_accuracy = accuracy_score(y_test['mental_health_status'], y_pred[:, 1])
age_accuracy = accuracy_score(y_test['age_category'], y_pred[:, 2])

# Calculate the average accuracy for each target variable
average_accuracy = np.mean([crime_type_accuracy, mental_health_accuracy, age_accuracy])

print("Crime Type Prediction Accuracy:", crime_type_accuracy)
print("Mental Health Status Prediction Accuracy:", mental_health_accuracy)
print("Age Range Prediction Accuracy:", age_accuracy)

# Average full model accuracy
print("Average Accuracy of the Model:", average_accuracy)

# Detailed classification report
print("\nClassification Report for Crime Type Prediction:")
print(classification_report(y_test['crime_type'], y_pred[:, 0]))

print("\nClassification Report for Mental Health Status Prediction:")
print(classification_report(y_test['mental_health_status'], y_pred[:, 1]))

print("\nClassification Report for Age Prediction:")
print(classification_report(y_test['age_category'], y_pred[:, 2]))

# Sample data to check actual vs predicted values

sample_data = pd.DataFrame({
    'weapon': ['Gun', 'Knife', 'Bat', 'Fire', 'Knife'],
    'location': ['Park', 'Office', 'Street', 'Building', 'Bar'],
    'crime_time': ['22:30', '14:00', '10:15', '02:30', '19:45'],
    'crime_date': ['2024-01-10', '2024-01-15', '2024-02-01', '2024-02-20', '2024-03-01'],
    'victim_gender': ['Male', 'Female', 'Male', 'Male', 'Female'],
    'victim_age': [40, 25, 50, 45, 30],
    'victim_report': [
        'The suspect was seen breaking in through the window.',
        'The victim was stabbed multiple times in the park.',
        'The suspect broke into the car and stole a laptop.',
        'The victim was attacked in a bar by the suspect.',
        'The suspect attacked the victim with a knife.'
    ],
    'description': [
        'Breaking into a house and stealing valuables.',
        'Stabbing in a park during the afternoon.',
        'Breaking into a car to steal valuables.',
        'Assaulting a person during a bar fight.',
        'Attacking a person with a knife in an alley.'
    ],
    'nationality': ['American', 'British', 'Canadian', 'Australian', 'Indian'],
    'criminal_record': ['Yes', 'No', 'Yes', 'No', 'Yes'],
    'residence': ['New York', 'London', 'Toronto', 'Sydney', 'Mumbai'],
    'personality_traits': ['Impulsive', 'Organized', 'Greedy', 'Reckless', 'Aggressive'],
    'mental_health_status': ['Stable', 'Depressed', 'Stable', 'Anxious', 'Stable'],
    'cognitive_function': ['Normal', 'Normal', 'Above Average', 'Normal', 'Below Average'],
    'substance_abuse_history': ['None', 'Alcohol', 'None', 'Drugs', 'None'],
    'anger_management_issues': ['Yes', 'No', 'Yes', 'No', 'Yes'],
    'past_trauma': ['No', 'Yes', 'Yes', 'Yes', 'No'],
    'risk_of_recidivism': ['High', 'Medium', 'High', 'Low', 'Medium'],
    'stress_level': ['High', 'Medium', 'Medium', 'High', 'Low'],
    'risk_factors': ['Anger, Stress', 'Trauma', 'Greed', 'Drugs, Anger', 'None'],
    'longitude': [-74.0060, -0.1278, -79.3832, 151.2093, 72.8777],  # Example longitude values for the locations
    'latitude': [40.7128, 51.5074, 43.6532, -33.8688, 19.0760],  # Example latitude values for the locations
    'gender': ['Male', 'Female', 'Male', 'Male', 'Female'],  # Gender of the suspect
    'crime_type': ['Burglary', 'Assault', 'Theft', 'Assault', 'Assault'],  # The type of crime committed
    'age': [35, 27, 40, 33, 29],
    'evidence_collected': ['Fingerprint', 'DNA Sample', 'Weapon', 'Clothing', 'Footprint'],# Age of the suspect
})


# Preprocess sample data using the pipeline
sample_data_processed = pipeline.named_steps['preprocessor'].transform(sample_data)

# Make predictions on the processed sample data
sample_predictions = pipeline.named_steps['model'].predict(sample_data_processed)


# Show the actual vs predicted results
sample_data['actual_crime_type'] = ['Burglary', 'Homicide', 'Burglary', 'Assault', 'Assault']
sample_data['predicted_crime_type'] = le_crime_type.inverse_transform(sample_predictions[:, 0])


# Display the results
print(sample_data[['weapon', 'location', 'crime_time', 'victim_gender', 'victim_age', 'crime_type', 'actual_crime_type', 'predicted_crime_type']])



Crime Type Prediction Accuracy: 1.0
Mental Health Status Prediction Accuracy: 1.0
Age Range Prediction Accuracy: 0.75
Average Accuracy of the Model: 0.9166666666666666

Classification Report for Crime Type Prediction:
              precision    recall  f1-score   support

           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         1
           4       1.00      1.00      1.00         1

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4


Classification Report for Mental Health Status Prediction:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         1
           2       1.00      1.00      1.00         3

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4


Classificatio

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
