In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import pickle

# Load the dataset
data = pd.read_csv('crime__5.csv', usecols=['date_time', 'latitude', 'longitude', 'city', 'state', 'crime_name3'])

# Reduce dataset size by sampling
data = data.sample(n=5000, random_state=42)

# Preprocess date and location inputs
data['date_time'] = pd.to_datetime(data['date_time'])
data['year'] = data['date_time'].dt.year
data['month'] = data['date_time'].dt.month
data['day'] = data['date_time'].dt.day
data['hour'] = data['date_time'].dt.hour
data['minute'] = data['date_time'].dt.minute
data['second'] = data['date_time'].dt.second

# Perform one-hot encoding for categorical variables
cat_cols = ['city', 'state']
enc = OneHotEncoder(drop='first', sparse=False)
encoded_cols = pd.DataFrame(enc.fit_transform(data[cat_cols]))
encoded_cols.columns = enc.get_feature_names(cat_cols)
data.drop(cat_cols, axis=1, inplace=True)
data = pd.concat([data, encoded_cols], axis=1)

# Remove rows with NaN values
data.dropna(subset=['crime_name3'], inplace=True)

# Split the data into training and testing sets
X = data.drop('crime_name3', axis=1)
y = data['crime_name3']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Define preprocessing steps
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

# Apply preprocessing to the numeric features
numeric_features = ['year', 'month', 'day', 'hour', 'minute', 'second', 'latitude', 'longitude']
preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, numeric_features)])

# Append classifier to preprocessing pipeline for Random Forest
rf_model = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier())])

# Fit the Random Forest model
rf_model.fit(X_train, y_train)

# Evaluate the Random Forest model on the test set
rf_pred = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_pred)
print('Random Forest Classification Report:')
print(classification_report(y_test, rf_pred))
print('Random Forest Accuracy:', rf_accuracy)

# Export the Random Forest model
pickle.dump(rf_model, open('random_forest_model_crime_name3.pkl', 'wb'))

# Append classifier to preprocessing pipeline for SVM
svm_model = Pipeline(steps=[('preprocessor', preprocessor),
                            ('classifier', SVC())])

# Fit the SVM model
svm_model.fit(X_train, y_train)

# Evaluate the SVM model on the test set
svm_pred = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_pred)
print('SVM Classification Report:')
print(classification_report(y_test, svm_pred))
print('SVM Accuracy:', svm_accuracy)

# Export the SVM model
pickle.dump(svm_model, open('svm_model_crime_name3.pkl', 'wb'))




Random Forest Classification Report:
                                                    precision    recall  f1-score   support

                                ALL OTHER OFFENSES       0.00      0.00      0.00         1
                               ARSON - RESIDENTIAL       0.00      0.00      0.00         1
                              ASSAULT - 2ND DEGREE       0.11      0.33      0.17        18
          ASSAULT - AGGRAVATED - FAMILY-STRONG-ARM       0.00      0.00      0.00         1
    ASSAULT - AGGRAVATED - NON-FAMILY-OTHER WEAPON       0.00      0.00      0.00         3
                      ASSAULT - AGGRAVATED - OTHER       0.00      0.00      0.00         0
        ASSAULT - INTIMIDATION (INCLUDES STALKING)       0.00      0.00      0.00         0
                                  ASSAULT - SIMPLE       0.00      0.00      0.00         6
                        AUTO THEFT - VEHICLE THEFT       0.14      0.08      0.11        12
                       BURGLARY (DESCRIBE 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


SVM Classification Report:
                                                    precision    recall  f1-score   support

                                ALL OTHER OFFENSES       0.00      0.00      0.00         1
                               ARSON - RESIDENTIAL       0.00      0.00      0.00         1
                              ASSAULT - 2ND DEGREE       0.06      0.17      0.09        18
          ASSAULT - AGGRAVATED - FAMILY-STRONG-ARM       0.00      0.00      0.00         1
    ASSAULT - AGGRAVATED - NON-FAMILY-OTHER WEAPON       0.00      0.00      0.00         3
                                  ASSAULT - SIMPLE       0.00      0.00      0.00         6
                        AUTO THEFT - VEHICLE THEFT       0.00      0.00      0.00        12
                       BURGLARY (DESCRIBE OFFENSE)       0.00      0.00      0.00         2
            BURGLARY - FORCED ENTRY-NONRESIDENTIAL       0.00      0.00      0.00         5
               BURGLARY - FORCED ENTRY-RESIDENTIAL  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
