In [80]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, classification_report, f1_score, mean_squared_error

In [101]:
dataOfwine = load_wine(as_frame=True)
wine_dataframe = dataOfwine.frame

print(wine_dataframe.head())

   alcohol  malic_acid   ash  alcalinity_of_ash  magnesium  total_phenols  \
0    14.23        1.71  2.43               15.6      127.0           2.80   
1    13.20        1.78  2.14               11.2      100.0           2.65   
2    13.16        2.36  2.67               18.6      101.0           2.80   
3    14.37        1.95  2.50               16.8      113.0           3.85   
4    13.24        2.59  2.87               21.0      118.0           2.80   

   flavanoids  nonflavanoid_phenols  proanthocyanins  color_intensity   hue  \
0        3.06                  0.28             2.29             5.64  1.04   
1        2.76                  0.26             1.28             4.38  1.05   
2        3.24                  0.30             2.81             5.68  1.03   
3        3.49                  0.24             2.18             7.80  0.86   
4        2.69                  0.39             1.82             4.32  1.04   

   od280/od315_of_diluted_wines  proline  target  
0          

In [82]:
zero_count = (wine_dataframe.iloc[:, :-1] == 0).sum()
print(zero_count)

alcohol                         0
malic_acid                      0
ash                             0
alcalinity_of_ash               0
magnesium                       0
total_phenols                   0
flavanoids                      0
nonflavanoid_phenols            0
proanthocyanins                 0
color_intensity                 0
hue                             0
od280/od315_of_diluted_wines    0
proline                         0
dtype: int64


In [102]:
print(wine_dataframe.isnull().sum())

alcohol                         0
malic_acid                      0
ash                             0
alcalinity_of_ash               0
magnesium                       0
total_phenols                   0
flavanoids                      0
nonflavanoid_phenols            0
proanthocyanins                 0
color_intensity                 0
hue                             0
od280/od315_of_diluted_wines    0
proline                         0
target                          0
dtype: int64


In [84]:
X = wine_dataframe.drop('target', axis=1)
y = wine_dataframe['target']

In [85]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [86]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [87]:
decisionTree= DecisionTreeClassifier(random_state=42)
decisionTree.fit(X_train_scaled, y_train)
y_pred = decisionTree.predict(X_test_scaled)

In [88]:
print("Results of Decision Tree Classifier:")
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

Results of Decision Tree Classifier:
Accuracy: 0.9629629629629629
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.95      0.95        19
           1       0.95      1.00      0.98        21
           2       1.00      0.93      0.96        14

    accuracy                           0.96        54
   macro avg       0.97      0.96      0.96        54
weighted avg       0.96      0.96      0.96        54



In [89]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train_scaled, y_train)
y_pred_rf = rf.predict(X_test_scaled)

In [90]:
print("Random Forest Tree Classifier Results:")
accuracy = accuracy_score(y_test, y_pred_rf)
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(classification_report(y_test, y_pred_rf))

Random Forest Tree Classifier Results:
Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        21
           2       1.00      1.00      1.00        14

    accuracy                           1.00        54
   macro avg       1.00      1.00      1.00        54
weighted avg       1.00      1.00      1.00        54



In [91]:
f1_dt = f1_score(y_test,y_pred,average="weighted")
f1_rf = f1_score(y_test,y_pred_rf,average="weighted")
print("Decision Tree F1 score: ", f1_dt)
print("Random Forest Tree F1 score: ", f1_rf)


Decision Tree F1 score:  0.9628353590455226
Random Forest Tree F1 score:  1.0


In [92]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
}

grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, scoring='accuracy', cv=5, verbose=1, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)


print("Best Hyperparameters:", grid_search.best_params_)

Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best Hyperparameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}


In [93]:
X_dr = wine_dataframe.drop('alcohol', axis=1)
y_dr = wine_dataframe['alcohol']

In [94]:
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X_dr, y_dr, test_size=0.3, random_state=42)

In [95]:
scaler = StandardScaler()
X_train_scaled_r = scaler.fit_transform(X_train_r)
X_test_scaled_r = scaler.transform(X_test_r)

In [96]:
dt_reg = DecisionTreeRegressor(random_state=42)
dt_reg.fit(X_train_scaled_r, y_train_r)
y_pred_dt = dt_reg.predict(X_test_scaled_r)

In [97]:
print("\nDecision Tree Regressor Results:")
mse_r = mean_squared_error(y_test_r, y_pred_dt)
print(f"Mean Squared Error: {mse_r}")


Decision Tree Regressor Results:
Mean Squared Error: 0.38300370370370396


In [98]:
rf_reg = RandomForestRegressor(random_state=42)
rf_reg.fit(X_train_scaled_r, y_train_r)
y_pred_rf = rf_reg.predict(X_test_scaled_r)

In [99]:
print("\nRandom Forest Regressor Results:")
mse_rf = mean_squared_error(y_test_r, y_pred_rf)
print(f"Mean Squared Error: {mse_rf}")


Random Forest Regressor Results:
Mean Squared Error: 0.17294733555555533


In [100]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
}

random_search  = RandomizedSearchCV(estimator=rf_reg, param_distributions=param_grid, scoring='neg_mean_squared_error', cv=5, verbose=1, n_jobs=-1)
random_search.fit(X_train_scaled_r, y_train_r)


print("Best Hyperparameters:", random_search.best_params_)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best Hyperparameters: {'n_estimators': 50, 'min_samples_split': 10, 'max_depth': 20}
