In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer

# Load the dataset with a specific encoding (e.g., 'latin-1' or 'ISO-8859-1')
data = pd.read_csv('crime.csv', encoding='latin-1')

# Drop unnecessary columns
data.drop(columns=['INCIDENT_NUMBER', 'DISTRICT', 'REPORTING_AREA', 'OCCURRED_ON_DATE', 'STREET', 'Lat', 'Long', 'Location'], inplace=True)

# Convert 'Y' and 'N' in 'SHOOTING' column to 1 and 0
data['SHOOTING'] = data['SHOOTING'].map({'Y': 1, 'N': 0})

# Encode categorical features if any
label_encoder = LabelEncoder()
data['OFFENSE_CODE_GROUP'] = label_encoder.fit_transform(data['OFFENSE_CODE_GROUP'])
data['DAY_OF_WEEK'] = label_encoder.fit_transform(data['DAY_OF_WEEK'])
data['UCR_PART'] = label_encoder.fit_transform(data['UCR_PART'])

# Separate features and target variable
X = data.drop(columns=['OFFENSE_DESCRIPTION'])
y = data['OFFENSE_DESCRIPTION']

# Handle missing values
imputer = SimpleImputer(strategy='mean')  # You can choose the imputation strategy based on your data
X_imputed = imputer.fit_transform(X)

# Apply PCA to reduce the dimensionality to n_components
n_components = 8  # Change this to 8 or lower
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X_imputed)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

# Train the Support Vector Classifier
svc = SVC()
svc.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svc.predict(X_test)

# Calculate accuracy score
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


  from pandas.core.computation.check import NUMEXPR_INSTALLED
  data = pd.read_csv('crime.csv', encoding='latin-1')
