In [None]:
import pandas as pd

df = pd.read_csv('..\data\processed\phishing.csv')
df.head()

In [None]:
# Importing module and initializing setup
from pycaret.classification import *

In [None]:
originaldata = setup(data = df, target = 'Result')

In [None]:
#compare the Best Model
best = compare_models()

In [None]:
import os 

model_folder = os.path.join('..', 'models')
os.makedirs(model_folder, exist_ok=True) 

models = ['et', 'rf', 'lightgbm', 'dt', 'gbc']

def train_model(models):
    for model in models:
        print(f"Model : {model}")
        # train rf model using 25 fold CV
        classifier_model = create_model(model, fold = 25)
        plot_model(classifier_model, plot = 'auc')
        plot_model(classifier_model, plot = 'pr')
        plot_model(classifier_model, plot = 'feature')
        plot_model(classifier_model, plot = 'confusion_matrix')

        # save_model(classifier_model, os.path.join('..'. 'models', model+'_model'))
        with open(os.path.join('..', 'models', model+'_model.pkl'), 'wb') as files:
            pickle.dump(classifier_model, files)
        
        print('_'*50)

train_model(models)

In [None]:
import glob
import joblib
import pickle
import numpy as np
from collections import Counter

def load_model(pkl_file_path):
    with open(pkl_file_path, 'rb') as file:
        model = pickle.load(file)
        # model = joblib.load(pkl_file_path)
    return model

def predict_with_model(model, input_data, class_names):
    # Make a prediction
    # predicted_output = model.predict([input_data])
    predicted_proba = model.predict_proba([input_data])[0]
    predicted_class_index = np.argmax(predicted_proba)
    predicted_class_name = class_names[predicted_class_index]
    predicted_class_probability = predicted_proba[predicted_class_index]

    return {
        "class_name": predicted_class_name,
        "probability": predicted_class_probability
    }

def predict_with_voting(class_names, probabilities):
    # Create a Counter object to count occurrences of each element
    element_counter = Counter(class_names)
    
    # Find the element with the maximum count
    most_common_element, count = element_counter.most_common(1)[0]
    
    # Find average of probabilities
    avg_probability = np.mean(probabilities)
    
    return most_common_element, avg_probability

def multimodel_voting(input_data, op_val=None): 
    model_class_names, model_probabilities = [], []
    
    for md in glob.glob(os.path.join('..', 'models', '*')):

        pkl_file_path = md # 'path_to_your_model.pkl'
        class_names = [-1, 1] # ['unsafe', 'safe'] # [-1, 1] # Update with your actual class names

        model = load_model(pkl_file_path)
        result = predict_with_model(model, input_data, class_names)
        model_class_names.append(result['class_name'])
        model_probabilities.append(result['probability'])

        print(result, ' || ',  op_val)
        print('__'*30)

    result_class, result_probability = predict_with_voting(model_class_names, model_probabilities)
    print(result_class, round(result_probability*100, 2))
    
    return result_class, round(result_probability*100, 2)
    
if __name__ == '__main__':
    
    df = pd.read_csv('phishing.csv')
    input_df_copy = df.copy().drop('Result', axis = 1)
    input_df = input_df_copy.sample(n=10)
    input_df = input_df.reset_index(drop=True)
    # display(input_df)
    
    result_op, prob_op = [], []
    for index in range(len(input_df)):
        res, prob = multimodel_voting(input_df.iloc[index, :], op_val=None)
        result_op.append(res) 
        prob_op.append(prob)
        print('\n')
    
    input_df['prediction'] = result_op
    input_df['probability'] = prob_op
    display(input_df)