In [9]:
import numpy as np
from sklearn.model_selection import cross_val_score, train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score
from io import StringIO

# Upload the dataset file in Colab
from google.colab import files

# Use file browser to upload dataset
print("Please upload your dataset file:")
uploaded = files.upload()

# Process the uploaded file
for filename in uploaded.keys():
    print('Uploaded file "{name}" with length {length} bytes'.format(
        name=filename, length=len(uploaded[filename])))

    # Load the uploaded file into a pandas DataFrame or NumPy array
    data = np.genfromtxt(StringIO(uploaded[filename].decode('utf-8')), delimiter=',', skip_header=1)
    X, y = data[:, :-1], data[:, -1]  # Assuming the last column is the target variable

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define the model
    rf_model = RandomForestClassifier()

    # Define hyperparameters to search over
    param_dist = {
        'n_estimators': [50, 100, 200],
        'max_depth': [10, 20, 30, None],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }

    # Perform Randomized Search with Cross-Validation
    random_search = RandomizedSearchCV(estimator=rf_model, param_distributions=param_dist, n_iter=10, cv=5, scoring='accuracy', n_jobs=-1)
    random_search.fit(X_train, y_train)

    # Get the best model
    best_rf_model = random_search.best_estimator_

    # Perform cross-validation with the best model
    cv_accuracy = cross_val_score(best_rf_model, X_train, y_train, cv=5, scoring='accuracy')
    cv_precision = cross_val_score(best_rf_model, X_train, y_train, cv=5, scoring='precision_weighted')
    cv_recall = cross_val_score(best_rf_model, X_train, y_train, cv=5, scoring='recall_weighted')

    # Print cross-validation results
    print("Cross-Validation Accuracy:", np.mean(cv_accuracy))
    print("Cross-Validation Precision:", np.mean(cv_precision))
    print("Cross-Validation Recall:", np.mean(cv_recall))


Please upload your dataset file:


Saving seeds.csv to seeds (2).csv
Uploaded file "seeds (2).csv" with length 9125 bytes
Cross-Validation Accuracy: 0.9370967741935484
Cross-Validation Precision: 0.9387668749261492
Cross-Validation Recall: 0.9625
