<a href="https://colab.research.google.com/github/IISC-GROUP-5/Jupyter_Notebooks/blob/Pratima/Inference_file_LR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

from sklearn.preprocessing import StandardScaler
import json


import pickle
import warnings

warnings.filterwarnings("ignore")

In [2]:
def load_model(model_file):
    """
    Load the trained model from a .pkl file.
    """
    with open(model_file, 'rb') as file:
        model = pickle.load(file)
    print(f"Model loaded successfully from {model_file}")
    return model

In [3]:
def preprocessing(new_data):
  """
  Loads a saved model and makes predictions on new data.

  Args:
    model_path: Path to the saved model file.
    new_data: A pandas DataFrame containing new data.

  Returns:
    A list of predicted labels.
  """
  # Replace spaces in column names with underscores
  new_data.columns = new_data.columns.str.replace(' ', '_')


  # Handle missing values
  for col in new_data.columns:
    if new_data[col].isnull().sum() > 0:
      if pd.api.types.is_numeric_dtype(new_data[col]):
        # For numeric columns, fill missing values with median
        new_data[col].fillna(new_data[col].median(), inplace=True)
      else:
        # For categorical columns, fill missing values with the most frequent value
        new_data[col].fillna(new_data[col].mode()[0], inplace=True)

  # Round 'Area Income' to 2 decimal places
  new_data['Area_Income'] = new_data['Area_Income'].round(2)

  # Preprocess new data (adjust as needed)
  label_encoding_mapping = {'Female': 0, 'Male': 1}
  new_data['Gender'] = new_data['Gender'].map(label_encoding_mapping)

  return new_data


In [4]:
def make_prediction(model, clean_data, target):
  """
  Loads a saved model and makes predictions on new data.

  Args:
    model_path: Path to the saved model file.
    new_data: A pandas DataFrame containing new data.

  Returns:
    A list of predicted labels.
  """
 # Ensure only the required columns are used
  clean_data = clean_data[feature_columns]

  # Make predictions
  prediction = model.predict(clean_data)
  probabilities = model.predict_proba(clean_data)
  print(target , prediction)
  print('Probabilities:', probabilities)
  return prediction, probabilities

In [5]:
def save_inference_results(output_file, clean_data, predictions, probabilities, rawdata):
    """
    Save the inference results to a file in JSON format.
    The `raw data` is used to store input in the original form.
    """
    results = []

    for i, row in enumerate(new_data.to_dict(orient='records')):
        result = {
            "new_data": row,
            "prediction": int(predictions[i])
            }
        if probabilities is not None:
            result["probabilities"] = list(probabilities[i])
        results.append(result)

    # Save to a JSON file
    with open(output_file, 'w') as file:
        json.dump(results, file, indent=4)

    print(f"Inference results saved to {output_file}")


In [6]:
model = load_model('CP_LR_Click_Ads.pkl')

# Define feature columns (must match the training data)
feature_columns = ['Daily_Time_Spent_on_Site', 'Age', 'Area_Income', 'Daily_Internet_Usage', 'Gender']


# Adding new file path in here
new_data = pd.read_csv('Test_Data.csv')

rawdata = pd.DataFrame(new_data)

clean_data = preprocessing(new_data)
target1 = 'Clicked_on_Ad'
predictions, probabilities = make_prediction(model, clean_data, target1)

save_inference_results('Ads_inference_results.json', clean_data, predictions, probabilities, rawdata)

Model loaded successfully from CP_LR_Click_Ads.pkl
Clicked_on_Ad [1 1 1 0 1 0 0 0 1 1 1 0 1 0 1 0 1 1 0 0 0 1 0 1 1 0 0 1 0 1 1 1 1 0 1 1 0
 1 1 0 0 0 1 1 0 1 1 1 1 1 0 1 1 0 1 0 1 1 0 0 0 1 1 0 0 1 1 0 1 1 1 1 0 1
 1 0 1 1 1 0 1 0 1 1 1 1 0 1 0 0 0 1 1 1 0 0 0 0 1 0 1 0 0 1 0 1 1 0 0 1 0
 1 0 1 0 0 0 1 0 0 1 0 0 1 1 1 1 0 1 1 1 1 1 0 0 0 0 0 1 0 1 0 1 0 0 1 0 0
 0 0 0 1 0 1 0 1 0 1 0 0 1 1 0 0 1 0 1 0 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 0 0
 0 0 1 0 1 0 0 0 0 1 0 0 1 1 1 0 1]
Probabilities: [[3.50034619e-01 6.49965381e-01]
 [1.22307201e-04 9.99877693e-01]
 [2.97789484e-04 9.99702211e-01]
 [9.92459505e-01 7.54049481e-03]
 [2.60624931e-01 7.39375069e-01]
 [9.02520681e-01 9.74793186e-02]
 [9.93968214e-01 6.03178648e-03]
 [8.74657073e-01 1.25342927e-01]
 [1.25583050e-04 9.99874417e-01]
 [3.46024745e-03 9.96539753e-01]
 [3.38364558e-04 9.99661635e-01]
 [9.76995020e-01 2.30049798e-02]
 [2.18227229e-03 9.97817728e-01]
 [9.55623025e-01 4.43769746e-02]
 [2.44178470e-04 9.99755822e-01]
 [9.61176764e-

In [7]:
model = load_model('CP_LR_Gender.pkl')

# Define feature columns (must match the training data)
feature_columns = ['Daily_Time_Spent_on_Site', 'Age', 'Area_Income', 'Daily_Internet_Usage', 'Clicked_on_Ad']


# Adding new file path in here
new_data = pd.read_csv('Test_Data.csv')

rawdata = pd.DataFrame(new_data)

clean_data = preprocessing(new_data)
target2 = 'Gender'
predictions, probabilities = make_prediction(model, clean_data, target2)

save_inference_results('Gender_inference_results.json', clean_data, predictions, probabilities, rawdata)

Model loaded successfully from CP_LR_Gender.pkl
Gender [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Probabilities: [[0.534056   0.465944  ]
 [0.52930781 0.47069219]
 [0.52572615 0.47427385]
 [0.5384058  0.4615942 ]
 [0.54219976 0.45780024]
 [0.52880452 0.47119548]
 [0.53641199 0.46358801]
 [0.5375329  0.4624671 ]
 [0.52736672 0.47263328]
 [0.53729324 0.46270676]
 [0.53007407 0.46992593]
 [0.5289417  0.4710583 ]
 [0.53214076 0.46785924]
 [0.53087483 0.46912517]
 [0.53689967 0.46310033]
 [0.54065602 0.45934398]
 [0.52546383 0.47453617]
 [0.52739357 0.47260643]
 [0.54189402 0.45810598]
 [0.53211898 0.46788102]
 [0.54096167 0.4590