## Import Libraries

In [107]:
from sklearn import datasets 
from sklearn.model_selection import train_test_split 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.datasets import make_classification
import numpy as np
from sklearn.preprocessing import StandardScaler

## Load the dataset

In [15]:
wine = datasets.load_wine()

In [10]:
wine

{'data': array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
         1.065e+03],
        [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
         1.050e+03],
        [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
         1.185e+03],
        ...,
        [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
         8.350e+02],
        [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
         8.400e+02],
        [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
         5.600e+02]]),
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

In [17]:
np.set_printoptions(suppress=True)  # Suppresses exponential notation
print(wine.data)

[[  14.23    1.71    2.43 ...    1.04    3.92 1065.  ]
 [  13.2     1.78    2.14 ...    1.05    3.4  1050.  ]
 [  13.16    2.36    2.67 ...    1.03    3.17 1185.  ]
 ...
 [  13.27    4.28    2.26 ...    0.59    1.56  835.  ]
 [  13.17    2.59    2.37 ...    0.6     1.62  840.  ]
 [  14.13    4.1     2.74 ...    0.61    1.6   560.  ]]


In [18]:
wine

{'data': array([[  14.23,    1.71,    2.43, ...,    1.04,    3.92, 1065.  ],
        [  13.2 ,    1.78,    2.14, ...,    1.05,    3.4 , 1050.  ],
        [  13.16,    2.36,    2.67, ...,    1.03,    3.17, 1185.  ],
        ...,
        [  13.27,    4.28,    2.26, ...,    0.59,    1.56,  835.  ],
        [  13.17,    2.59,    2.37, ...,    0.6 ,    1.62,  840.  ],
        [  14.13,    4.1 ,    2.74, ...,    0.61,    1.6 ,  560.  ]]),
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 

## Input Features & Output Features

In [11]:
wine.feature_names

['alcohol',
 'malic_acid',
 'ash',
 'alcalinity_of_ash',
 'magnesium',
 'total_phenols',
 'flavanoids',
 'nonflavanoid_phenols',
 'proanthocyanins',
 'color_intensity',
 'hue',
 'od280/od315_of_diluted_wines',
 'proline']

In [12]:
wine.target_names

array(['class_0', 'class_1', 'class_2'], dtype='<U7')

In [20]:
wine.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2])

## Glimpse of data 

### Input features

In [19]:
wine.data 

array([[  14.23,    1.71,    2.43, ...,    1.04,    3.92, 1065.  ],
       [  13.2 ,    1.78,    2.14, ...,    1.05,    3.4 , 1050.  ],
       [  13.16,    2.36,    2.67, ...,    1.03,    3.17, 1185.  ],
       ...,
       [  13.27,    4.28,    2.26, ...,    0.59,    1.56,  835.  ],
       [  13.17,    2.59,    2.37, ...,    0.6 ,    1.62,  840.  ],
       [  14.13,    4.1 ,    2.74, ...,    0.61,    1.6 ,  560.  ]])

## Output variable 

In [21]:
wine.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2])

## Assigning input and output variables 

 Let's assign the 4 input variables to X and the output variable to Y

In [22]:
X = wine.data 
Y = wine.target

## Let's examine the data dimension 

In [23]:
X.shape

(178, 13)

In [24]:
Y.shape

(178,)

# Build Classification Model using Random Forest

In [25]:
clf = RandomForestClassifier()

In [27]:
clf.fit(X,Y)

# Feature Importance 

In [28]:
print(clf.feature_importances_)

[0.12620697 0.03538829 0.01299562 0.02609781 0.03297301 0.07003578
 0.12680218 0.00593813 0.01739023 0.16147384 0.07672876 0.12862014
 0.17934925]


## Make Prediction 

In [29]:
X[0]

array([  14.23,    1.71,    2.43,   15.6 ,  127.  ,    2.8 ,    3.06,
          0.28,    2.29,    5.64,    1.04,    3.92, 1065.  ])

In [34]:
clf.fit(wine.data , wine.target_names[wine.target])

In [35]:
print(clf.predict([X[0]]))

['class_0']


In [36]:
print(clf.predict(X))

['class_0' 'class_0' 'class_0' 'class_0' 'class_0' 'class_0' 'class_0'
 'class_0' 'class_0' 'class_0' 'class_0' 'class_0' 'class_0' 'class_0'
 'class_0' 'class_0' 'class_0' 'class_0' 'class_0' 'class_0' 'class_0'
 'class_0' 'class_0' 'class_0' 'class_0' 'class_0' 'class_0' 'class_0'
 'class_0' 'class_0' 'class_0' 'class_0' 'class_0' 'class_0' 'class_0'
 'class_0' 'class_0' 'class_0' 'class_0' 'class_0' 'class_0' 'class_0'
 'class_0' 'class_0' 'class_0' 'class_0' 'class_0' 'class_0' 'class_0'
 'class_0' 'class_0' 'class_0' 'class_0' 'class_0' 'class_0' 'class_0'
 'class_0' 'class_0' 'class_0' 'class_1' 'class_1' 'class_1' 'class_1'
 'class_1' 'class_1' 'class_1' 'class_1' 'class_1' 'class_1' 'class_1'
 'class_1' 'class_1' 'class_1' 'class_1' 'class_1' 'class_1' 'class_1'
 'class_1' 'class_1' 'class_1' 'class_1' 'class_1' 'class_1' 'class_1'
 'class_1' 'class_1' 'class_1' 'class_1' 'class_1' 'class_1' 'class_1'
 'class_1' 'class_1' 'class_1' 'class_1' 'class_1' 'class_1' 'class_1'
 'clas

In [37]:
print(clf.predict_proba([X[0]]))

[[1. 0. 0.]]


## Data split(80/20 ratio)

In [38]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state =100)

In [39]:
X_train.shape ,Y_train.shape

((142, 13), (142,))

In [40]:
X_test.shape ,Y_test.shape

((36, 13), (36,))

# Rebuild the Random Forest Model 

In [41]:
clf.fit(X_train,Y_train)

## Perform Prediction on the data set

In [103]:
print(clf.predict([X[0]]))

[0]


In [43]:
print(clf.predict_proba([X[0]]))

[[1. 0. 0.]]


## Perform Prediction on test set 

#### Predicted class label

In [110]:
y_pred = print(clf.predict(X_test))
y_pred

[1 2 0 1 1 1 0 1 1 1 2 1 2 2 2 0 2 0 1 0 1 0 2 1 0 0 1 1 1 2 2 2 0 0 2 1]


#### Actual class label

In [46]:
print(Y_test)

[1 2 0 1 2 2 1 1 1 1 2 1 2 2 2 0 2 0 1 0 2 0 1 1 0 0 1 1 1 2 2 1 0 1 2 2]


# Model Performance 

In [109]:
# Apply Standardization (if needed)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)  # Use the same scaler

# Train RandomForest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, Y_train)

# Ensure correct feature count before prediction
print("Final X_test shape:", X_test.shape)  # Should match X_train shape

# Make predictions
y_pred = clf.predict(X_test)

# Print predictions
print("Predictions:", y_pred)


Final X_test shape: (36, 1)
Predictions: [1 2 0 1 1 1 0 1 1 1 2 1 2 2 2 0 2 0 1 0 1 0 2 1 0 0 1 1 1 2 2 2 0 0 2 1]


In [99]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score
import seaborn as sns

In [115]:
# Make predictions
y_pred = clf.predict(X_test)
y_pred_proba = clf.predict_proba(X_test)

# Evaluate model performance
print(f"Accuracy: {accuracy_score(Y_test, y_pred):.2f}")

Accuracy: 0.78


In [116]:
# Classification Report
print("Classification Report:")
print(classification_report(Y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.80      1.00      0.89         8
           1       0.73      0.73      0.73        15
           2       0.82      0.69      0.75        13

    accuracy                           0.78        36
   macro avg       0.78      0.81      0.79        36
weighted avg       0.78      0.78      0.77        36



In [117]:
# ROC-AUC Score (for binary or multi-class classification)
if len(clf.classes_) == 2:
    roc_auc = roc_auc_score(Y_test, y_pred_proba[:, 1])
else:
    roc_auc = roc_auc_score(Y_test, y_pred_proba, multi_class='ovr')
print(f"ROC-AUC Score: {roc_auc:.2f}")

ROC-AUC Score: 0.87


In [119]:
print(clf.score(X_test , Y_test))

0.7777777777777778
