In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:
data = {
    "Age": [22, 35, 26, 29, 40, 50, 34, 28, 23, 60],
    "Income": [20_000, 50_000, 25_000, 30_000, 80_000, 100_000, 70_000, 32_000, 21_000, 120_000],
    "Gender": ["Male", "Female", "Female", "Male", "Male", "Female", "Female", "Male", "Male", "Female"],
    "Purchased": [0, 1, 0, 0, 1, 1, 1, 0, 0, 1], #0=No, 1=Yes
}
# Target varable is purchased and feature is age, income and gender

In [3]:
df = pd.DataFrame(data)

In [4]:
df

Unnamed: 0,Age,Income,Gender,Purchased
0,22,20000,Male,0
1,35,50000,Female,1
2,26,25000,Female,0
3,29,30000,Male,0
4,40,80000,Male,1
5,50,100000,Female,1
6,34,70000,Female,1
7,28,32000,Male,0
8,23,21000,Male,0
9,60,120000,Female,1


In [7]:
df.info(True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Age        10 non-null     int64 
 1   Income     10 non-null     int64 
 2   Gender     10 non-null     object
 3   Purchased  10 non-null     int64 
dtypes: int64(3), object(1)
memory usage: 448.0+ bytes


In [8]:
df["Gender"] = df['Gender'].map({"Male": 1, "Female": 0})

In [9]:
df

Unnamed: 0,Age,Income,Gender,Purchased
0,22,20000,1,0
1,35,50000,0,1
2,26,25000,0,0
3,29,30000,1,0
4,40,80000,1,1
5,50,100000,0,1
6,34,70000,0,1
7,28,32000,1,0
8,23,21000,1,0
9,60,120000,0,1


In [10]:
df.info(True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   Age        10 non-null     int64
 1   Income     10 non-null     int64
 2   Gender     10 non-null     int64
 3   Purchased  10 non-null     int64
dtypes: int64(4)
memory usage: 448.0 bytes


In [11]:
X = df[["Gender", "Age", "Income"]]
y = df["Purchased"]

In [12]:
X

Unnamed: 0,Gender,Age,Income
0,1,22,20000
1,0,35,50000
2,0,26,25000
3,1,29,30000
4,1,40,80000
5,0,50,100000
6,0,34,70000
7,1,28,32000
8,1,23,21000
9,0,60,120000


In [13]:
y

Unnamed: 0,Purchased
0,0
1,1
2,0
3,0
4,1
5,1
6,1
7,0
8,0
9,1


In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [15]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [17]:
y_pred = model.predict(X_test)
print(y_pred)

[0 1 1 0]


In [18]:
accuracy = accuracy_score(y_test, y_pred)
print(accuracy)

1.0


In [19]:
classify = classification_report(y_test, y_pred)
print(classify)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         2

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4

