In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


df = pd.read_csv('crime_dataset_india.csv')

df = df.dropna(subset=[
    'Victim Age', 'Victim Gender', 'Weapon Used',
    'Crime Domain', 'Case Closed', 'Time of Occurrence'
])


df['Hour'] = pd.to_datetime(df['Time of Occurrence'], errors='coerce').dt.hour
df = df.dropna(subset=['Hour'])

df['CaseClosedBinary'] = df['Case Closed'].str.strip().map({'Yes': 1, 'No': 0})

numeric_feats     = ['Victim Age', 'Hour']
categorical_feats = ['Victim Gender', 'Weapon Used', 'Crime Domain']

X = df[numeric_feats + categorical_feats]
y = df['CaseClosedBinary']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numeric_feats),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_feats)
])

pipeline = Pipeline([
    ('prep', preprocessor),
    ('clf', RandomForestClassifier(
        n_estimators=100, random_state=42, n_jobs=-1
    ))
])

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['CaseClosedBinary'] = df['Case Closed'].str.strip().map({'Yes': 1, 'No': 0})


Accuracy:  0.5075673680324843

Classification Report:
               precision    recall  f1-score   support

           0       0.51      0.51      0.51      1367
           1       0.50      0.51      0.51      1342

    accuracy                           0.51      2709
   macro avg       0.51      0.51      0.51      2709
weighted avg       0.51      0.51      0.51      2709

