In [3]:
!pip install scikit-learn --break-system-packages



Defaulting to user installation because normal site-packages is not writeable


In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
import numpy as np

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define hyperparameter grid for GridSearchCV
param_grid = {
    'n_estimators': [10, 50, 100, 200],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 5, 10]
}

# Define hyperparameter distribution for RandomizedSearchCV
param_dist = {
    'n_estimators': np.arange(10, 200, 10),
    'max_depth': [None] + list(np.arange(5, 20, 5)),
    'min_samples_split': np.arange(2, 11),
    'min_samples_leaf': np.arange(1, 11)
}

# GridSearchCV
grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5)
grid_search.fit(X_train, y_train)

print("Best Parameters (GridSearchCV):", grid_search.best_params_)
print("Best Score (GridSearchCV):", grid_search.best_score_)

# RandomizedSearchCV
random_search = RandomizedSearchCV(RandomForestClassifier(random_state=42), param_dist, cv=5, n_iter=10)
random_search.fit(X_train, y_train)

print("Best Parameters (RandomizedSearchCV):", random_search.best_params_)
print("Best Score (RandomizedSearchCV):", random_search.best_score_)

# Evaluate the best models
best_grid_model = grid_search.best_estimator_
best_random_model = random_search.best_estimator_

y_pred_grid = best_grid_model.predict(X_test)
y_pred_random = best_random_model.predict(X_test)

print("Accuracy (GridSearchCV):", accuracy_score(y_test, y_pred_grid))
print("Accuracy (RandomizedSearchCV):", accuracy_score(y_test, y_pred_random))

Best Parameters (GridSearchCV): {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 10}
Best Score (GridSearchCV): 0.95
Best Parameters (RandomizedSearchCV): {'n_estimators': np.int64(110), 'min_samples_split': np.int64(6), 'min_samples_leaf': np.int64(1), 'max_depth': None}
Best Score (RandomizedSearchCV): 0.95
Accuracy (GridSearchCV): 1.0
Accuracy (RandomizedSearchCV): 1.0


In [1]:
with open('model.pkl', 'rb') as f:
    content = f.read(200)
    print(content[:200])  # See what's in the file


b'\x80\x04\x95\xa5\x05\x00\x00\x00\x00\x00\x00}\x94(\x8c\x05model\x94\x8c\x18sklearn.ensemble._forest\x94\x8c\x15RandomForestRegressor\x94\x93\x94)\x81\x94}\x94(\x8c\testimator\x94\x8c\x15sklearn.tree._classes\x94\x8c\x15DecisionTreeRegressor\x94\x93\x94)\x81\x94}\x94(\x8c\tcriterion\x94\x8c\rsquared_error\x94\x8c\x08splitter\x94\x8c\x04best\x94\x8c\tmax'
