<a href="https://colab.research.google.com/github/ShabnaIlmi/Data-Science-Group-Project/blob/Importer_Risk_Prediction_02/Exploratary_Data_Analysis_05.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Installing Necessary Libraries**

In [14]:
# Installing Required Libraries
# !pip install --upgrade tensorflow
# !pip install fancyimpute scikit-learn pandas

# **Importing the Relevant Libraries**

In [15]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.impute import KNNImputer
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.cluster import KMeans
import warnings
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, StratifiedKFold
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
import shap
import pickle
import joblib

AttributeError: partially initialized module 'pandas' has no attribute '_pandas_parser_CAPI' (most likely due to a circular import)

In [None]:
# Mounting the Google Drive
from google.colab import drive
drive.mount('/content/drive')

# **Loading the Dataset**

In [None]:
# Loading the dataset with the proper delimiter (semicolon)
data = pd.read_excel('/content/drive/MyDrive/Importer_Risk_Prediction_2/Dataset/IMPORT STATISTICS - 2023.xlsx')

In [None]:
# Displaying the first few rows of the data
data.head()

# **Exploratory Data Analysis**

In [None]:
# Displaying information
print("Displaying data information")
data.info()

### **Identifying Categorical and Numerical Columns**

In [None]:
# Identifying categorical and numerical columns
categorical_cols = data.select_dtypes(include=['object']).columns
numerical_cols = data.select_dtypes(include=['int64', 'float64']).columns

**Since 'HSCODE' column has been misintepreted as an int64 data type column due to the unavailability of data, reassigning it has an object type column.**

In [None]:
# Converting 'IMPORTER' and 'HSCODE' columns to string type
data['HSCODE'] = data['HSCODE'].astype(str)

# Displaying the HSCODE column data type
print("Data Type of HSCODE Columns:")
print(data[['HSCODE']].dtypes)

## **Categorical Features**

In [None]:
# List of categorical features
categorical_features = data.select_dtypes(include=['object']).columns

# Displaying the categorical features
print("Categorical Features:")
for feature in categorical_features:
    print(f"- {feature}")

# Display data type of the columns
print("\nData Type of Categorical Features:")
print(data[categorical_features].dtypes)

**Unique Values and Their Counts Relevant to Each Categorical Column**

In [None]:
# Displaying the unique values and their counts relevant to each categorical column
print("Unique values and their count relevant to each categorical column:\n")
for col in categorical_features:
    unique_values = data[col].unique()
    value_counts = data[col].value_counts()
    print(value_counts)
    print(" ")

In [None]:
# Displaying the categorical columns which contains null values and their counts
print("Categorical columns with null values and their counts:")
for col in categorical_features:
    null_count = data[col].isnull().sum()
    if null_count > 0:
        print(f"{col}: {null_count}")

In [None]:
# Displaying the categorical columns which contain 'Unknown' values and their relevant counts
print("Categorical columns with 'Unknown' values and their counts:")
for col in categorical_features:
    unknown_count = (data[col] == 'Unknown').sum()
    if unknown_count > 0:
        print(f"{col}: {unknown_count}")

## **Numerical Features**

**Since 'Year' column has been misintepreted as a float64 data type column reassigning it as a int64 type column**

In [None]:
# Converting the 'Year' column to int64 data type
# data['YEAR'] = data['YEAR'].astype(int)

# Displaying the data type of the 'Year' column
# print("Data Type of 'Year' Column:")
# print(data['YEAR'].dtype)

**The conversion cannot be done since the column contains missing values (both N/A and null values)**

In [None]:
# Numerical Features
numerical_features = data.select_dtypes(include=['int64', 'float64']).columns

# Displaying the Numerical Columns
print("Numerical Features:")
print(numerical_features)

**Unique Values and Their Count Relevant to Each Numerical Column**

In [None]:
# Displaying the unique values and their count in the numerical columns
print("Unique values and their count in the numerical columns:\n")
for col in numerical_features:
    unique_values = data[col].unique()
    value_counts = data[col].value_counts()
    print(value_counts)
    print(" ")

**Numerical columns with null values and their relevant counts**

In [None]:
# Displaying the numerical columns with null values and their relevant counts
print("Numerical columns with null values and their relevant counts:")
for col in numerical_cols:
    null_count = data[col].isnull().sum()
    if null_count > 0:
        print(f"{col}: {null_count}")

# **Data Preprocessing**

## **Data Cleaning**

In [None]:
# Step 1: Removing whitespaces from the object type columns
object_columns = data.select_dtypes(include=['object']).columns
data[object_columns] = data[object_columns].apply(lambda x: x.str.strip())

In [None]:
# Step 2: Dropping duplicate values
data.drop_duplicates(inplace=True)
data.reset_index(drop=True, inplace=True)

In [None]:
# Display dataset information
print("\nDataset information after removing duplicates:")
data.info()

In [None]:
# Step 3: Removing unnecessary full stops(".") from the categorical columns
data[categorical_cols] = data[categorical_cols].apply(lambda x: x.str.replace('.', ''))

In [None]:
# Step 4: Removing unnecessary special characters and trailing spaces from the 'DESCRIPTION_03' column
# Removing the leading hyphen
data['DESCRIPTION_03'] = data['DESCRIPTION_03'].str.lstrip('-')

# Removing trailing spaces
data['DESCRIPTION_03'] = data['DESCRIPTION_03'].str.strip()

In [None]:
# Step 5: Removing trailing spaces for the entire 'DESCRIPTION_02' column
data['DESCRIPTION_02'] = data['DESCRIPTION_02'].str.strip()

In [None]:
# Displaying the unique values and their counts relevant to each categorical column
print("Unique values and their count relevant to each categorical column:\n")
for col in categorical_features:
    unique_values = data[col].unique()
    value_counts = data[col].value_counts()
    print(value_counts)
    print(" ")

In [None]:
# Step 6: Converting all the values in the 'COUNTRY' and the 'UNIT' columns to uppercase values
data['COUNTRY'] = data['COUNTRY'].str.upper()
data['UNIT'] = data['UNIT'].str.upper()

# Displaying the modified dataset
print(data)

In [None]:
# Step 7: Handling the HSCODES

# Checking the length of the HSCODES and determining the maximum length
data['HSCODE_LENGTH'] = data['HSCODE'].apply(len)
max_length = data['HSCODE_LENGTH'].max()

# Padding HSCODE values with trailing zeros to match the maximum length
data['HSCODE'] = data['HSCODE'].apply(lambda x: x.ljust(max_length, '0'))

# Dropping the helper column 'HSCODE_LENGTH' as it's no longer needed
data.drop(columns=['HSCODE_LENGTH'], inplace=True)

# Displaying the modified dataset
print(data)

In [None]:
# Step 8: Handling the 'COUNTRY' Column

# Replacing specific country names
data['COUNTRY'] = data['COUNTRY'].replace({
    'United States': 'USA',
    'United States of America': 'USA',
    'United Kingdom': 'UK',
    'Korea, Republic of': 'South Korea'
})

print(data)

In [None]:
# Displaying the unique values and their counts relevant to each categorical column
print("Unique values and their count relevant to each categorical column:\n")
for col in categorical_features:
    unique_values = data[col].unique()
    value_counts = data[col].value_counts()
    print(value_counts)
    print(" ")

## **Handling Missing Values**

**Handling missing values in the categorical columns**

**Replacing all the missing values with 'Unknown'**

In [None]:
# Step 12: Handling missing values in the categorical columns

# Replacing all the missing values in the categorical columns with 'Unknown' for imputation
data[categorical_features] = data[categorical_features].fillna('Unknown')

**Checking for any missing values which is left behind when replacing with 'Unkown'**

In [None]:
# Verifying the changes after replacing the missing values with 'Unknown'
print("Checking for any missing values left behind after replacing with 'Unknown':")
for col in categorical_features:
    null_count = data[col].isnull().sum()
    if null_count > 0:
        print(f"{col}: {null_count} missing values")
    else:
        print(f"{col}: No missing values")
    print(" ")

In [None]:
# Handling the 'Unknown' values in the 'COUNTRY' column using the mode
mode_country = data['COUNTRY'].mode()[0]
data['COUNTRY'] = data['COUNTRY'].replace('Unknown', mode_country)

In [None]:
# Dropping the 'MONTH' and 'IMPORTERS' Columns due to the significant amount of null values
data.drop(columns=['MONTH', 'IMPORTER'], inplace=True)

In [None]:
# Displaying dataset information
print("\nDataset information after categorical imputation:")
data.info()

In [None]:
# Display unique values in the 'COUNTRY' column
unique_countries = data['COUNTRY'].unique()
print("Unique Countries:")
print(unique_countries)

In [None]:
# Display unique values in the 'HSCODE' column
unique_hscodes = data['HSCODE'].unique()
print("Unique HSCodes:")
print(unique_hscodes)

In [None]:
# Display the unique values in the 'UNIT' column
unique_units = data['UNIT'].unique()
print("Unique Units:")
print(unique_units)

In [None]:
# Displaying the unique values in the 'DESCRIPTION_01' column
unique_descriptions_01 = data['DESCRIPTION_01'].unique()
print("Unique Descriptions_01:")
print(unique_descriptions_01)

In [None]:
# Displaying the unique values in the 'DESCRIPTION_02' column
unique_descriptions_02 = data['DESCRIPTION_02'].unique()
print("Unique Descriptions_02:")
print(unique_descriptions_02)

In [None]:
# Displaying the unique values in the 'DESCRIPTION_03' column
unique_descriptions_03 = data['DESCRIPTION_03'].unique()
print("Unique Descriptions_03:")
print(unique_descriptions_03)

# **Data Encoding**

In [None]:
# Step 13: Encoding the Categorical variables

import joblib
from sklearn.preprocessing import OneHotEncoder
import os

# Initialize a dictionary to store encoders
encoders = {}

# Define the directory where you want to save the encoders
encoder_directory = '/content/drive/MyDrive/Importer_Risk_Prediction_2/encoders/'

# Ensure the directory exists
os.makedirs(encoder_directory, exist_ok=True)

# Encoding categorical columns
encoding_columns = ['HSCODE', 'COUNTRY', 'UNIT', 'DESCRIPTION_01', 'DESCRIPTION_02', 'DESCRIPTION_03']
for col in encoding_columns:
    one_hot_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
    encoded = one_hot_encoder.fit_transform(data[[col]])
    encoded_df = pd.DataFrame(encoded, columns=[f'{col}_{category}' for category in one_hot_encoder.categories_[0]])
    data = pd.concat([data, encoded_df], axis=1)
    data.drop(columns=[col], inplace=True)

    # Save the encoder for the column with the column name as the file name in the specified directory
    encoder_filename = os.path.join(encoder_directory, f'{col}_encoder.pkl')
    joblib.dump(one_hot_encoder, encoder_filename)
    encoders[col] = one_hot_encoder
    print(f"One-Hot Encoding applied to column: {col} and encoder saved as {encoder_filename}")

print("All categorical columns encoded and encoders saved successfully.")


## **Handling missing values in the numerical columns**

**Using K-NN Imputaion to efficiently capture the small missing values and the captures local patterns in the 'Quantity' coulmn**

In [None]:
# Step 14: Handling missing values in the numerical columns

# KNN Imputation for 'QUANTITY'
knn_imputer = KNNImputer(n_neighbors=5)
data[['QUANTITY']] = knn_imputer.fit_transform(data[['QUANTITY']])

In [None]:
# Step 15: Dropping the 'YEAR' n the 'VALUE_RS' Column
data.drop(columns=['YEAR', 'VALUE_RS'], inplace=True)

## **Handling the Outliers**

In [None]:
# Step 16: Handling the outliers in the numerical columns

# Assigning the Numerical Columns for the outlier analysis
outlier_columns = ["QUANTITY"]

# Creating an empty dictionary to store the outliers
outlier_info = {}

for column in outlier_columns:
    Q1, Q3 = data[column].quantile([0.25, 0.75])
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    # Detecting outliers
    outliers = data[(data[column] < lower_bound) | (data[column] > upper_bound)]

    outlier_info[column] = {
        'Outliers': len(outliers),
        'Lower Bound': lower_bound,
        'Upper Bound': upper_bound
    }

    print(f"Column: {column}")
    print(f"  Outliers: {outlier_info[column]['Outliers']}, Lower: {lower_bound:.2f}, Upper: {upper_bound:.2f}")
    print(" ")

## **Visualization of the Outliers**

In [None]:
# Visualizing the outliers for Quantity
plt.figure(figsize=(10, 5))
Q1 = data['QUANTITY'].quantile(0.25)
Q3 = data['QUANTITY'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

sns.boxplot(x=data['QUANTITY'], color='skyblue', flierprops=dict(marker='o', color='red', markersize=5))
plt.axvline(lower_bound, color='red', linestyle='--', label='Lower Bound')
plt.axvline(upper_bound, color='green', linestyle='--', label='Upper Bound')
plt.title('Boxplot of Quantity')
plt.xlabel('Quantity')
plt.legend()
plt.grid(axis='x', alpha=0.75)
plt.tight_layout()
plt.show()

In [None]:
# Function to handle outliers by capping
def handle_outliers(data, method='cap'):
    for column in data.columns:
      if column == 'QUANTITY':
            lower_bound, upper_bound = -0.31, 0.03
            data[column] = data[column].apply(lambda x: min(max(x, lower_bound), upper_bound) if method == 'cap' else x)

    return data


# Applying the function to handle outliers
data = handle_outliers(data.copy(), method='cap')

# Displaying the cleaned dataset
print(data.head())

In [None]:
# Step 17: Normalizing Quantity
scaler = StandardScaler()
quantity_scaled = scaler.fit_transform(data[['QUANTITY']])

In [None]:
# Step 18: Normalizing Quantity

# Performing K-means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(data)

# Assigning risk levels directly
importers = np.array(['GlobalChem Corp', 'EcoImports Ltd', 'SafeChem Traders', 'Prime Chemicals Inc', 'ChemTrade Solutions',
             'GreenEarth Supplies', 'BioChem Imports', 'Reliable Chemicals Co', 'TransGlobal Imports', 'PureChem Ltd'])

data['IMPORTER'] = importers[clusters]

print("Clustering completed successfully. Importers assigned.")

In [None]:
# Step 19: Encoding the 'IMPORTER' Column

# Assuming 'data' is your DataFrame that contains the 'IMPORTER' column
one_hot_encoded = pd.get_dummies(data['IMPORTER'], prefix='IMPORTER')

# Adding the one-hot encoded columns back to the original DataFrame
data = pd.concat([data, one_hot_encoded], axis=1)

# Dropping the original 'IMPORTER' column
data = data.drop('IMPORTER', axis=1)

# Saving the encoder
joblib.dump(one_hot_encoded.columns, '/content/drive/MyDrive/Importer_Risk_Prediction_2/encoders/IMPORTER_encoder.pkl')

# Displaying the result
print(data)

In [None]:
# Displaying Dataset Information
print(data.info())

In [None]:
# Displaying Dataset Head
print(data.head())

## **Assigning the Target Variable**

In [None]:
# Step 19: Assigning the Target Valriab;

# Performing K-means clustering
kmeans = KMeans(n_clusters=4, random_state=42)
clusters = kmeans.fit_predict(data)

# Assigning risk levels directly
risk_levels = np.array(["Low Risk", "Moderate Risk", "High Risk", "No Risk"])
data['RISK'] = risk_levels[clusters]

print("Clustering completed successfully. Risk levels assigned.")

In [None]:
# Display Dataset Information
print(data.info())

In [None]:
# Print Unique Values for All Columns
for col in data.columns:
    print(f"Unique values in '{col}':")
    print(data[col].unique())
    print('-' * 40)

In [None]:
# Saving the Preprocessed Code
data.to_csv('/content/drive/MyDrive/Importer_Risk_Prediction_2/Dataset/Preprocessed_before_data.csv', index=False)

In [None]:
# Check if 'RISK' column is present
if 'RISK' not in data.columns:
    raise ValueError("Column 'RISK' not found in the dataset")

# Convert RISK column to string type (if not already)
data['RISK'] = data['RISK'].astype(str)

# Debug: Check unique values
print("Unique Values in RISK:", data['RISK'].unique())

# Initialize Label Encoder
label_encoder = LabelEncoder()

# Fit and transform the RISK column
data['RISK'] = label_encoder.fit_transform(data['RISK'])

# Debug: Check unique encoded labels
print("Encoded Labels:", label_encoder.classes_)

# Save the Encoder using joblib
joblib.dump(label_encoder, '/content/drive/MyDrive/Importer_Risk_Prediction_2/encoders/label_encoder.pkl')
print("Label Encoder model saved as 'label_encoder.pkl'")

# Preview Final Data
print(data.head())

In [None]:
# Display the Dataset head
print(data.head())

In [None]:
# Set Style
plt.figure(figsize=(8, 6))
sns.countplot(x='RISK', data=data, palette='viridis')
plt.title('Target Variable Distribution')
plt.xlabel('Risk Category')
plt.ylabel('Count')
plt.show()

In [None]:
# Features (X) and Target (y)
X = data.drop(['RISK'], axis=1)
y = data['RISK']

In [None]:
# Displaying the Columns in X
print("Columns in X:")
print(X.columns)

In [None]:
# Splitting dataset into training and testing before SMOTE
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

print("Training Set Size:", X_train.shape[0])
print("Testing Set Size:", X_test.shape[0])

# **Balancing the Target Variable**

In [None]:
# Handling the imbalance dataset

# Suppressing the FutureWarnings
warnings.filterwarnings("ignore", category=FutureWarning)

# Handling imbalanced dataset
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

print("Original dataset size:", X.shape)
print("Resampled dataset size:", X_resampled.shape)

In [None]:
# Plotting class distribution after SMOTE
sns.countplot(x=y_resampled, palette="viridis")
plt.title("Class Distribution After SMOTE")
plt.xlabel("Class")
plt.ylabel("Count")
plt.show()

In [None]:
# Displaying the dataset shape after applying the SMOTE
print(f"Shape of X_resampled: {X_resampled.shape}")
print(f"Shape of y_resampled: {y_resampled.shape}")

In [None]:
# Displaying the Columns in X
print("Columns in X:")
print(X_resampled.columns)

In [None]:
# Standardizing the features
scaler = StandardScaler()
X_resampled_scaled = scaler.fit_transform(X_resampled)

In [None]:
# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X_resampled_scaled, y_resampled, test_size=0.2, stratify = y_resampled, random_state=42)

In [None]:
# Displaying the Dataset Information
print("Dataset Information:")
print(data.info())

In [None]:
# Saving the Preprocessed Code
data.to_csv('/content/drive/MyDrive/Importer_Risk_Prediction_2/Dataset/Preprocessed_data.csv', index=False)

# **Model Training**

In [None]:
# Initialize the RandomForestClassifier (without hyperparameter tuning)
rf = RandomForestClassifier(random_state=42)

In [None]:
# Train the RandomForest model on the training data
rf.fit(X_train, y_train)

In [None]:
# Predict on the test set
y_pred = rf.predict(X_test)

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print(f"Confusion Matrix:\n{cm}")

In [None]:
# Plotting Confusion Matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.unique(y_resampled), yticklabels=np.unique(y_resampled))
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

In [None]:
# Classification Report
report = classification_report(y_test, y_pred)
print(f"Classification Report:\n{report}")

In [None]:
# Accuracy score on the test set
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy on test set: {accuracy * 100:.2f}%")

In [None]:
import joblib

# Save the Random Forest Model
joblib.dump(rf, '/content/drive/MyDrive/Importer_Risk_Prediction_2/models/random_forest_model.joblib')
print("Random Forest Model Saved Successfully!")

# **The Predictions**

In [None]:
import joblib
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

# Load trained model
try:
    model = joblib.load('/content/drive/MyDrive/Importer_Risk_Prediction_2/models/random_forest_model.joblib')
    print("Model loaded successfully.")
except Exception as e:
    print(f"Error loading model: {e}")
    exit()

# Load LabelEncoder for target variable
try:
    label_encoder = joblib.load('/content/drive/MyDrive/Importer_Risk_Prediction_2/encoders/target_label_encoder.pkl')
    print("Target LabelEncoder loaded successfully.")
except Exception as e:
    print(f"Error loading target LabelEncoder: {e}")
    exit()

# Define categorical columns
categorical_columns = ['IMPORTER', 'HSCODE', 'COUNTRY', 'UNIT', 'DESCRIPTION_01', 'DESCRIPTION_02', 'DESCRIPTION_03']

# Load OneHotEncoders for each categorical column
encoders = {}
for col in categorical_columns:
    try:
        encoder_file = f'/content/drive/MyDrive/Importer_Risk_Prediction_2/encoders/{col}_encoder.pkl'
        encoders[col] = joblib.load(encoder_file)
        print(f"{col} encoder loaded successfully.")
    except Exception as e:
        print(f"Error loading encoder for {col}: {e}")
        exit()

def get_user_input():
    """Get user input for the prediction."""
    print("\nPlease enter the following details:")
    importer = input("Enter Importer: ")
    hscode = input("Enter HS Code: ")
    country = input("Enter Country: ")
    unit = input("Enter Unit: ")
    description_01 = input("Enter Description 1: ")
    description_02 = input("Enter Description 2: ")
    description_03 = input("Enter Description 3: ")

    try:
        quantity = float(input("Enter Quantity: "))
    except ValueError:
        print("Invalid quantity. Please enter a numeric value.")
        exit()

    return pd.DataFrame({
        'IMPORTER': [importer],
        'HSCODE': [hscode],
        'COUNTRY': [country],
        'UNIT': [unit],
        'DESCRIPTION_01': [description_01],
        'DESCRIPTION_02': [description_02],
        'DESCRIPTION_03': [description_03],
        'QUANTITY': [quantity]
    })

def preprocess_input(data):
    """Preprocess user input using saved OneHotEncoders."""
    try:
        # Prepare the encoded categorical data
        encoded_features = []

        for col in categorical_columns:
            encoder = encoders.get(col)
            if encoder:
                # Transform the input and convert to array if needed
                encoded = encoder.transform(data[[col]])
                if hasattr(encoded, "toarray"):
                    encoded = encoded.toarray()
                encoded_features.append(encoded)
            else:
                print(f"Encoder not found for column: {col}")
                exit()

        # Convert the list of encoded features to a single numpy array
        encoded_features = np.hstack(encoded_features)

        # Keep numerical features (quantity)
        numerical_features = data[['QUANTITY']].values

        # Combine encoded categorical features with numerical features
        final_features = np.hstack((encoded_features, numerical_features))

        return final_features
    except Exception as e:
        print(f"Error during preprocessing: {e}")
        exit()

def predict_risk():
    """Predict risk level for user input."""
    user_data = get_user_input()
    processed_data = preprocess_input(user_data)

    try:
        # Ensure the input is 2D for the model (shape should be (1, n_features))
        processed_data = np.array(processed_data).reshape(1, -1)

        # Make the prediction (numeric output)
        risk_prediction_numeric = model.predict(processed_data)[0]

        # Convert numerical prediction back to original label
        risk_prediction_label = label_encoder.inverse_transform([risk_prediction_numeric])[0]

        print(f"Predicted Risk Level: {risk_prediction_label}")

    except Exception as e:
        print(f"Error during prediction: {e}")

if __name__ == "__main__":
    predict_risk()
