In [6]:
import warnings

In [8]:
warnings.filter('Ignore')

AttributeError: module 'warnings' has no attribute 'filter'

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# Load dataset from URL
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
column_names = [
    'class', 'cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor', 'gill-attachment',
    'gill-spacing', 'gill-size', 'gill-color', 'stalk-shape', 'stalk-root', 'stalk-surface-above-ring',
    'stalk-surface-below-ring', 'stalk-color-above-ring', 'stalk-color-below-ring', 'veil-type',
    'veil-color', 'ring-number', 'ring-type', 'spore-print-color', 'population', 'habitat'
]
df = pd.read_csv(url, header=None, names=column_names)

# Handle missing values ('?') in 'stalk-root'
df.replace('?', np.nan, inplace=True)
df['stalk-root'].fillna(df['stalk-root'].mode()[0], inplace=True)

# Separate the target column ('class') before encoding
y = df['class'].apply(lambda x: 1 if x == 'e' else 0)  # Encode 'e' (edible) as 1, 'p' (poisonous) as 0
X = df.drop(columns=['class'])

# Encode categorical variables in features using one-hot encoding
X_encoded = pd.get_dummies(X, drop_first=True)

# Train-test split (80/20)
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Q1: Decision Tree Classifier using ID3
dt_model = DecisionTreeClassifier(criterion='entropy', random_state=42)
dt_model.fit(X_train, y_train)

# Evaluate Decision Tree
y_pred_dt = dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, y_pred_dt)
print("Decision Tree Classifier:")
print(f"Accuracy: {dt_accuracy:.4f}")
print(classification_report(y_test, y_pred_dt))

# Q2: Random Forest Classifier
n_features = int(np.ceil(np.sqrt(X_train.shape[1])))  # Square root of total features
rf_model = RandomForestClassifier(max_features=n_features, random_state=42)
rf_model.fit(X_train, y_train)

# Evaluate Random Forest
y_pred_rf = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, y_pred_rf)
print("Random Forest Classifier:")
print(f"Accuracy: {rf_accuracy:.4f}")
print(classification_report(y_test, y_pred_rf))


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['stalk-root'].fillna(df['stalk-root'].mode()[0], inplace=True)


Decision Tree Classifier:
Accuracy: 1.0000
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       782
           1       1.00      1.00      1.00       843

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625

Random Forest Classifier:
Accuracy: 1.0000
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       782
           1       1.00      1.00      1.00       843

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625

