In [2]:
# Import necessary libraries
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, mean_squared_error, f1_score
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn

# Set MLflow tracking URI
mlflow.set_tracking_uri("mlruns")


In [3]:
# Load dataset
df = pd.read_csv('partitions/encoded_data.csv')

# Define features and target
x = df[['age', 'sex', 'race', 'total_juvenile_offenses', 'detention_period', 'prior_offense_count', 'current_charge_degree']]
y = df['is_recidivist']

# Split the data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=28)

# Initialize and train the model
model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=28)
model.fit(x_train, y_train)

# Make predictions
y_pred = model.predict(x_test)


In [4]:
# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
f1 = f1_score(y_test, y_pred, average='weighted')

print("Model Accuracy: ", accuracy)
print("RMSE: ", rmse)
print("F1 Score: ", f1)


Model Accuracy:  0.7283053157655611
RMSE:  0.526447292211292
F1 Score:  0.7045926358688335


In [5]:
def predict_recidivism(input_data):
    """
    Predicts whether an individual is a recidivist based on input features.
    
    Parameters:
    - input_data (dict): Dictionary containing feature values.
    
    Returns:
    - prediction (int): 0 or 1, indicating whether the individual is a recidivist or not.
    """
    # Convert input data to a DataFrame
    input_df = pd.DataFrame([input_data])
    
    # Ensure the input DataFrame has the same feature columns as the training data
    input_df = input_df[['age', 'sex', 'race', 'total_juvenile_offenses', 'detention_period', 'prior_offense_count', 'current_charge_degree']]
    
    # Make prediction
    prediction = model.predict(input_df)
    
    return prediction[0]

# Example usage
input_data = {
    'age': 35,
    'sex': 1, 
    'race': 0,  
    'total_juvenile_offenses': 5,
    'detention_period': 120,
    'prior_offense_count': 1,
    'current_charge_degree': 2
}

prediction = predict_recidivism(input_data)
print("Prediction (1 means recidivist, 0 means not recidivist):", prediction)


Prediction (1 means recidivist, 0 means not recidivist): 1


In [6]:
def mlFlowVersioning(model, model_name, input_example):
    with mlflow.start_run() as run:
        mlflow.sklearn.log_model(model, "model", input_example=input_example)
        model_uri = f"runs:/{run.info.run_id}/model"
        mlflow.register_model(model_uri, model_name)

# Example input for logging
input_example = {
    'age': 30,
    'sex': 0,
    'race': 1,
    'total_juvenile_offenses': 2,
    'detention_period': 5,
    'prior_offense_count': 0,
    'current_charge_degree': 1
}

mlFlowVersioning(model, "Reci", input_example)


  example = _Example(input_example)


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Registered model 'Reci' already exists. Creating a new version of this model...
Created version '2' of model 'Reci'.
