In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import torch
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
# Path to the CSV file
file_path = '/content/drive/MyDrive/Car_Insurance_Claim.csv/Car_Insurance_Claim.csv'

categorical_features = ['AGE', 'GENDER', 'RACE', 'DRIVING_EXPERIENCE', 'EDUCATION', 'INCOME', 'VEHICLE_OWNERSHIP', 'VEHICLE_YEAR', 'MARRIED', 'CHILDREN', 'POSTAL_CODE', 'VEHICLE_TYPE']

data = pd.read_csv(file_path)
data = data.dropna()
data.drop('ID', axis=1, inplace=True)

data = pd.get_dummies(data, columns=categorical_features)
data = data.astype('float32')

X = data.drop('OUTCOME', axis=1).values
y = data['OUTCOME'].values

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features (important for neural networks)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
# Change path to that of model
model = torch.jit.load('/content/drive/MyDrive/model.pt')

In [None]:
# Function predits the class of the datapoint
def get_class(datapoint):
  # Convert the dictionary to a DataFrame
  manual_df = pd.DataFrame([datapoint])

  # Convert categorical variables to the format used in the model
  manual_df = pd.get_dummies(manual_df, columns=categorical_features)

  # Ensure all columns match the training set, missing columns should be filled with 0
  for col in data.columns:
      if col not in manual_df.columns:
          manual_df[col] = 0

  # Align the order of columns as per the model's training data
  manual_df = manual_df[data.columns.drop('OUTCOME')]  # Exclude the outcome column
  manual_df = manual_df.astype(np.float32)
  new_data = []
  for col in data.columns.drop('OUTCOME'):
    new_data.append(manual_df[col][0])

  manual_df = scaler.transform([new_data])
  tensor_data_point = torch.tensor(manual_df, dtype=torch.float32).squeeze(0)

  # Set the model to evaluation mode and predict
  model.eval()
  with torch.no_grad():
      prediction = model(tensor_data_point)

  # Print the predicted probability and the binary prediction
  predicted_class = (prediction > 0.5).int()
  return predicted_class.item()


# Change datapoint according to needs
data_point1 = {'AGE': '16-25',
    'GENDER': 'male',
    'RACE': 'minority',
    'DRIVING_EXPERIENCE': '0-9y',
    'EDUCATION': 'none',
    'INCOME': 'poverty',
    'CREDIT_SCORE': 0.17952575773693658,
    'VEHICLE_OWNERSHIP': 1.0,
    'VEHICLE_YEAR': 'after 2015',
    'MARRIED': 1.0,
    'CHILDREN': 1.0,
    'POSTAL_CODE': 10238,
    'ANNUAL_MILEAGE': 9000.0,
    'VEHICLE_TYPE': 'sedan',
    'SPEEDING_VIOLATIONS': 0,
    'DUIS': 0,
    'PAST_ACCIDENTS': 0
}

In [None]:
# Run print statement with function to get the classification of the datapoint
print(get_class(data_point1))

0
