In [None]:
import pip
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import json


import pickle
import warnings

warnings.filterwarnings("ignore")


def load_model(model_file):
    """
    Load the trained model from a .pkl file.
    """
    with open(model_file, 'rb') as file:
        model = pickle.load(file)
    print(f"Model loaded successfully from {model_file}")

    return model


def preprocessing(new_data):
  """
  Loads a saved model and makes predictions on new data.

  Args:
    model_path: Path to the saved model file.
    new_data: A pandas DataFrame containing new data.

  Returns:
    A list of predicted labels.
  """
  # Replace spaces in column names with underscores
  new_data.columns = new_data.columns.str.replace(' ', '_')


  # Handle missing values
  for col in new_data.columns:
    if new_data[col].isnull().sum() > 0:
      if pd.api.types.is_numeric_dtype(new_data[col]):
        # For numeric columns, fill missing values with median
        new_data[col].fillna(new_data[col].median(), inplace=True)
      else:
        # For categorical columns, fill missing values with the most frequent value
        new_data[col].fillna(new_data[col].mode()[0], inplace=True)

  # Round 'Area Income' to 2 decimal places
  new_data['Area_Income'] = new_data['Area_Income'].round(2)

  # Preprocess new data (adjust as needed)
  label_encoding_mapping = {'Female': 1, 'Male': 0}
  new_data['Gender'] = new_data['Gender'].map(label_encoding_mapping)

  return new_data


def make_prediction(model, clean_data):
  """
  Loads a saved model and makes predictions on new data.

  Args:
    model_path: Path to the saved model file.
    new_data: A pandas DataFrame containing new data.

  Returns:
    A list of predicted labels.
  """
 # Ensure only the required columns are used
  clean_data = clean_data[feature_columns]

  # Make predictions
  prediction = model.predict(clean_data)
  probabilities = model.predict_proba(clean_data)
  print('Predictions', prediction)
  print('Probabilities:', probabilities)
  return prediction, probabilities



def save_inference_results(output_file, clean_data, predictions, probabilities, rawdata):
    """
    Save the inference results to a file in JSON format.
    The `raw data` is used to store input in the original form.
    """
    results = []

    for i, row in enumerate(new_data.to_dict(orient='records')):
        result = {
            "new_data": row,
            "prediction": int(predictions[i])
            }
        if probabilities is not None:
            result["probabilities"] = list(probabilities[i])
        results.append(result)

    # Save to a JSON file
    with open(output_file, 'w') as file:
        json.dump(results, file, indent=4)

    print(f"Inference results saved to {output_file}")


if __name__ == "__main__":
  models = [
      "LR_Click_Ads.pkl",
      "LR_Gender.pkl"
  ]
  # Define feature columns (must match the training data)
  feature_columns = ['Daily_Time_Spent_on_Site', 'Age', 'Area_Income', 'Daily_Internet_Usage' ]

  # Adding new file path in here
  new_data = pd.read_csv('Test_Data.csv')

  for model_file in models:
    model = load_model(model_file)

    rawdata = pd.DataFrame(new_data)

    clean_data = preprocessing(new_data)

    predictions, probabilities = make_prediction(model, clean_data)

    base_name = model_file.rsplit('.', 1)[0]
    output_file = f"{base_name}_inference_results.json"

    save_inference_results(output_file, clean_data, predictions, probabilities, rawdata)


Model loaded successfully from LR_Click_Ads.pkl
Predictions [1 1 1 0 1 0 0 0 1 1 1 0 1 0 1 0 1 1 0 0 0 1 0 1 1 0 0 1 0 1 1 1 1 0 1 1 0
 1 1 0 0 0 1 1 0 1 1 1 1 1 0 1 1 0 1 0 1 1 0 0 0 1 1 0 0 1 1 0 1 1 1 1 0 1
 1 0 1 1 1 0 1 0 1 1 1 1 0 1 0 0 0 1 1 1 0 0 0 0 1 0 1 0 0 1 0 1 1 0 0 1 0
 1 0 1 0 0 0 1 0 0 1 0 0 1 1 1 1 0 1 1 1 1 1 0 0 0 0 0 1 0 1 0 1 0 0 1 0 0
 0 0 0 1 0 1 0 1 0 1 0 0 1 1 0 0 1 0 1 0 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 0 0
 0 0 1 0 1 0 0 0 0 1 0 0 1 1 1 0 1]
Probabilities: [[4.22472373e-01 5.77527627e-01]
 [6.39629357e-04 9.99360371e-01]
 [1.02889821e-03 9.98971102e-01]
 [9.86416406e-01 1.35835936e-02]
 [2.61268705e-01 7.38731295e-01]
 [8.77667655e-01 1.22332345e-01]
 [9.88632487e-01 1.13675134e-02]
 [8.73011420e-01 1.26988580e-01]
 [5.83653664e-04 9.99416346e-01]
 [1.21229484e-02 9.87877052e-01]
 [1.36671927e-03 9.98633281e-01]
 [9.67074226e-01 3.29257741e-02]
 [6.22955231e-03 9.93770448e-01]
 [9.40681651e-01 5.93183486e-02]
 [7.73903844e-04 9.99226096e-01]
 [9.49998246e-01 5.