# Model Inference

In [12]:
import pickle

from sklearn.base import BaseEstimator, TransformerMixin

import pandas as pd
import numpy as np

In [9]:
class MissingIndicatorImputer(BaseEstimator, TransformerMixin):
    """
    Missing values imputer similar to ```SimpleImputer``` but add a missing value flag columns to indicate missingness
    """
    def __init__(self, strategy: str="mean", fill_value=None) -> None:
        self.strategy = strategy
        self.fill_value = fill_value
        self.fill_values_ = {}
        self.new_col_str = "missing_"
        self.columns = []

    def fit(self, X, y = None):
        df = pd.DataFrame(X)

        self.columns = df.columns
        for col in df.columns:
            if self.strategy == "constant":
                self.fill_values_[col] = self.fill_value
            elif self.strategy == "mode":
                self.fill_values_[col] = df[col].mode()[0]
            elif self.strategy == "mean":
                self.fill_values_[col] = df[col].mean()
            elif self.strategy == "median":
                self.fill_values_[col] = df[col].median()
            else:
                raise ValueError("Unknown fill strategy")
        
        return self
        
    def transform(self, X):
        df = pd.DataFrame(X).copy()

        for col in self.columns:
            missing_col_name = f"{self.new_col_str}{col}"
            df[missing_col_name] = df[col].isnull().astype(int)
            df[col] = df[col].fillna(self.fill_values_[col])

        return df
    
    def get_feature_names_out(self, input_features=None):
        return [col for col in self.columns] + [f"{self.new_col_str}{col}" for col in self.columns]

In [10]:
with open("deployment/model.pkl", "rb") as file_1:
    model = pickle.load(file_1)

model

In [36]:
input = {
    "Location": ["Albury"],
    "MinTemp": 7.4,
    "MaxTemp": 15.7,
    "Rainfall": 5,
    "Evaporation": np.nan,
    "Sunshine": 2,
    "WindGustDir": np.nan,
    "WindGustSpeed": 80,
    "WindDir9am": "S",
    "WindDir3pm": "ESE",
    "WindSpeed9am": 24,
    "WindSpeed3pm": 30,
    "Humidity9am": 64,
    "Humidity3pm": 5,
    "Pressure9am": 994.3,
    "Pressure3pm": 1010.3,
    "Cloud9am": 8,
    "Cloud3pm": 7,
    "Temp9am": 20,
    "Temp3pm": np.nan,
    "RainToday": "Yes"
}

input = pd.DataFrame(input)

input

Unnamed: 0,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,WindDir3pm,...,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday
0,Albury,7.4,15.7,5,,2,,80,S,ESE,...,30,64,5,994.3,1010.3,8,7,20,,Yes


In [33]:
print(f"Prediction whether it will rain tomorrow: {model.predict(input)[0]}")

Prediction whether it will rain tomorrow: Yes
