In [None]:
import sqlite3
import pandas as pd

def fetch_preprocessed_data_from_db(db_path='../db.incidents.db'):
    """
    Fetches preprocessed data directly from the SQLite database.
    
    Args:
    - db_path (str): Path to the SQLite database.
    
    Returns:
    - DataFrame: Preprocessed data as a Pandas DataFrame.
    """
    # Connect to the SQLite database
    conn = sqlite3.connect(db_path)
    
    # Query all data from the incidents table
    query = "SELECT * FROM incidents"
    df = pd.read_sql_query(query, conn)
    
    # Close the connection
    conn.close()
    return df

# Fetch preprocessed data
db_path = '..\db\incidents.db'  # Update this path if needed
preprocessed_data = fetch_preprocessed_data_from_db(db_path)
print("Data fetched from the database successfully.")



Data fetched from the database successfully.


In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib
import pandas as pd
import os

# Prepare the data for training
def prepare_data_for_training(df):
    """
    Prepares data for model training by encoding categorical features and separating input and target variables.
    
    Args:
    - df (DataFrame): Preprocessed data.
    
    Returns:
    - X (DataFrame): Feature matrix.
    - y (Series): Target variable.
    """
    # Aggregate to calculate threat score (if not already provided)
    threat_df = df.groupby(['neighborhood', 'day_of_week', 'hour']).size().reset_index(name='threat_score')

    # Encode categorical variables
    encoder = OneHotEncoder(sparse_output=False)
    encoded_data = encoder.fit_transform(threat_df[['neighborhood', 'day_of_week']])
    encoded_df = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out())

    # Combine encoded features with numerical data
    X = pd.concat([encoded_df, threat_df[['hour']].reset_index(drop=True)], axis=1)
    y = threat_df['threat_score']

    # Save the encoder for deployment
    current_dir = os.getcwd()

    # Create the 'data/risk' folder if it doesn't exist
    encoder_directory = os.path.join(current_dir, '..', 'data', 'rithvik')
    if not os.path.exists(encoder_directory):
        os.makedirs(encoder_directory)

    # Save the model and label encoders in the 'data/risk' folder
    encoder_pkl = os.path.join(encoder_directory, 'encoder.pkl')
    joblib.dump(encoder, encoder_pkl)
    print("Model and encoders saved successfully.")


    return X, y

# Prepare data for training
X, y = prepare_data_for_training(preprocessed_data)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the XGBoost model
model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=6, random_state=42)
model.fit(X_train, y_train)

# Save the model
current_dir = os.getcwd()

# Create the 'data/risk' folder if it doesn't exist
risk_directory = os.path.join(current_dir, '..', 'data', 'rithvik')
if not os.path.exists(risk_directory):
    os.makedirs(risk_directory)

# Save the model and label encoders in the 'data/risk' folder
threat_model = os.path.join(risk_directory, 'xg_boost_model.pkl')
joblib.dump(model, threat_model)
print("Model and encoders saved successfully.")

# Evaluate the model
y_pred = model.predict(X_test)
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred):.2f}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred):.2f}")
print(f"R² Score: {r2_score(y_test, y_pred):.2f}")


XGBoostError: 
XGBoost Library (libxgboost.dylib) could not be loaded.
Likely causes:
  * OpenMP runtime is not installed
    - vcomp140.dll or libgomp-1.dll for Windows
    - libomp.dylib for Mac OSX
    - libgomp.so for Linux and other UNIX-like OSes
    Mac OSX users: Run `brew install libomp` to install OpenMP runtime.

  * You are running 32-bit Python on a 64-bit OS

Error message(s): ["dlopen(/Users/mohith/.pyenv/versions/3.11.4/lib/python3.11/site-packages/xgboost/lib/libxgboost.dylib, 0x0006): Library not loaded: @rpath/libomp.dylib\n  Referenced from: <BBC4A126-D15A-3802-AD26-108872BA781A> /Users/mohith/.pyenv/versions/3.11.4/lib/python3.11/site-packages/xgboost/lib/libxgboost.dylib\n  Reason: tried: '/opt/homebrew/opt/libomp/lib/libomp.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/opt/homebrew/opt/libomp/lib/libomp.dylib' (no such file), '/opt/homebrew/opt/libomp/lib/libomp.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/opt/homebrew/opt/libomp/lib/libomp.dylib' (no such file), '/Users/mohith/.pyenv/versions/3.11.4/lib/libomp.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/Users/mohith/.pyenv/versions/3.11.4/lib/libomp.dylib' (no such file), '/opt/homebrew/lib/libomp.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/opt/homebrew/lib/libomp.dylib' (no such file), '/Users/mohith/.pyenv/versions/3.11.4/lib/libomp.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/Users/mohith/.pyenv/versions/3.11.4/lib/libomp.dylib' (no such file), '/opt/homebrew/lib/libomp.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/opt/homebrew/lib/libomp.dylib' (no such file)"]
