In [616]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [618]:
import pandas as pd
X_test = pd.DataFrame({'User_ID', 'Gender', 'Age', 'EstimatedSalary'})
df = pd.read_csv('User_Data.csv')
Gender_mapping = {"male": 1 , "female": 0}
df["Gender"] = df["Gender"].map(Gender_mapping)

In [620]:
X = df[['User ID', 'Gender', 'Age', 'EstimatedSalary']] 
Y = df['Purchased']  

In [622]:
X_train, X_test, Y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [624]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)

In [626]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   User ID          400 non-null    int64  
 1   Gender           0 non-null      float64
 2   Age              400 non-null    int64  
 3   EstimatedSalary  400 non-null    int64  
 4   Purchased        400 non-null    int64  
dtypes: float64(1), int64(4)
memory usage: 15.8 KB
None


In [628]:
rf.fit(X_train, Y_train)

In [630]:
Y_pred = rf.predict(X_test)

In [632]:
Y_prob = rf.predict_proba(X_test)

In [634]:
accuracy = accuracy_score(y_test, Y_pred)
report = classification_report(y_test, Y_pred)

print(f"Accuracy: {accuracy}")
print(f"Classification Report:\n{report}")


Accuracy: 0.8916666666666667
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.90      0.91        73
           1       0.85      0.87      0.86        47

    accuracy                           0.89       120
   macro avg       0.89      0.89      0.89       120
weighted avg       0.89      0.89      0.89       120



In [636]:
print("Probability estimates for the first 5 instances:")
for i in range(5):
    print(f"Instance {i+1}: {Y_prob[i]}")

Probability estimates for the first 5 instances:
Instance 1: [0.07 0.93]
Instance 2: [0.43 0.57]
Instance 3: [0.99 0.01]
Instance 4: [0.23 0.77]
Instance 5: [0.99 0.01]


In [638]:
X_test = [[160000, 1, 45, 340000]] 
(Y_pred) = 1

In [640]:
Y_prob = rf.predict_proba(X_test)



In [642]:
X_test = [[15901537, 0, 29, 35000]]
Y_pred = 1
print(Y_prob)

[[0.38 0.62]]


In [644]:
X_test = [[160000, 1, 45, 340000]] 
print(Y_pred)

1


In [646]:
X_test = [[158000, 0, 29, 34000]] 
print(Y_pred)

1


In [648]:
X_test = [[158000, 0, 29, 34000]] 
Y_pred = 0
print(Y_prob)

[[0.38 0.62]]


In [650]:
X_test = [[15901537, 0, 29, 35000]]
print(Y_pred)          

0


In [652]:
X_test = [[15901537, 0, 45, 0]]
print(Y_pred)

0


In [654]:
X_test = [[15901537, 0, 45, 10000000]]
print(Y_pred)

0


In [656]:
X_test = [[15901537, 1, 45, 100000]]
print(Y_pred)

0


In [658]:
X_test = [[15901537, 1, 20, 20000]]
print(Y_pred)

0


In [660]:
import gradio as gr
def predict(input_features):
    input_array = np.array(input_features).reshape(1, -1)
    prediction = rf.predict(input_array)
    probability = rf.predict_proba(input_array)
    return {"Prediction": int(prediction[0]), "Probability": probability.tolist()}


In [662]:
inputs = [
    gr.components.Number(label="User_ID"),
    gr.components.Number(label="Gender"),
    gr.components.Number(label="Age"),
    gr.components.Number(label="EstimatedSalary"),
]

In [664]:
outputs = gr.components.JSON()

In [666]:
gr_interface = gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title="Random Forest Classifier",
                            description="User_ID, Gender, Age, EstimatedSalary")




In [668]:
gr_interface.launch()

Running on local URL:  http://127.0.0.1:7873

To create a public link, set `share=True` in `launch()`.


