# Project 3 Combined Models Notebook
We used multiple notebooks for developing the models used here. Those notebooks include the iterative steps used to develop the models and select the features used for prediction. There's a lot of work there, and those notebooks run rather slowly. This is the streamlined version - we load the saved models, we load the saved feature lists, and then apply them to the cleaned, merged dataset and add the resulting predictions to that dataset as additional columns.

# Setup

## Import Libraries

In [1]:
import pandas as pd
import os

import joblib

from feature_list_vault import site_model_features, score_model_features 

## Set File Locations

In [2]:
# data for import
import_data_csv = "../00_Data/cleaned_data/cleaned_merged_data.csv"

# model for site score
site_score_model_file = 'superfund_score_model.sav'

# model for site probability
site_probability_model_file = 'superfund_site_model.sav'

# output file
export_data_csv = "../00_Data/cleaned_data/data_complete.csv"

# Import Data

In [3]:
# Import census data
data_df = pd.read_csv(import_data_csv)

# Generate Predictions

## Load Models

In [4]:
site_score_model = joblib.load(site_score_model_file)
site_probability_model = joblib.load(site_probability_model_file)

## Prepare input data

In [5]:
site_probability_input = data_df.copy().loc[:, site_model_features]
site_score_input = data_df.copy().loc[:, score_model_features]

## Run Models

In [6]:
site_presence_prediction = site_probability_model.predict_proba(site_probability_input)

predicted_probabilities = []
for i in range(0, len(site_presence_prediction)):
    predicted_probabilities.append(site_presence_prediction[i][1])

In [7]:
site_score_prediction = site_score_model.predict(site_score_input)

## Build Results DF

In [8]:
export_df = data_df.copy()
export_df['site_probability'] = predicted_probabilities
export_df['score_prediction'] = site_score_prediction

# Export Results

In [9]:
export_df.to_csv(export_data_csv, index = False)