In [None]:
!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
!python rapidsai-csp-utils/colab/pip-install.py

# Import the neccesary modules

In [None]:
import cudf
from cuml.ensemble import RandomForestClassifier as cuRF
from cuml.model_selection import train_test_split, GridSearchCV
from cuml.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
import cupy as cp

In [None]:
df = cudf.read_csv("/content/i5_final_dataset.csv")

In [None]:
df = df.fillna(0)
df.isnull().sum()

# RandomForest Model

In [None]:
# Split into features and target
X = df.drop('final_result', axis=1)
y = df['final_result']

# Split the dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

# Define the model
model = cuRF()

# Train the model
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)

# Convert predictions and y_test to NumPy arrays for accuracy calculation
predictions = predictions.to_numpy()
y_test = y_test.to_numpy()

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy for predicting {'final_result'} is {accuracy}")

In [None]:
def train_and_evaluate(X_train, X_test, y_train, y_test, params):
    # Define the model with given parameters
    model = cuRF(**params)

    # Train the model
    model.fit(X_train, y_train)

    # Make predictions
    predictions = model.predict(X_test)

    # Convert predictions and y_test to CuPy arrays for accuracy calculation
    predictions = cp.asarray(predictions)
    y_test = cp.asarray(y_test)

    # Evaluate the model using cuml's accuracy_score
    accuracy = accuracy_score(y_test, predictions)
    return accuracy

In [None]:
# Split into features and target
X = df.drop('final_result', axis=1)
y = df['final_result']

# Split the dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

# Define hyperparameter grid
param_grid = {
    'n_estimators': [200, 250, 300],
    'max_depth': [40, 50, 60],
    'min_samples_split': [2, 4],
    'min_samples_leaf': [1,2],
    'max_features': ['log2']
}
# Create a list to hold the results
results = []

# Loop through each combination of hyperparameters
for n_estimators in param_grid['n_estimators']:
    for max_depth in param_grid['max_depth']:
        for min_samples_split in param_grid['min_samples_split']:
            for min_samples_leaf in param_grid['min_samples_leaf']:
                for max_features in param_grid['max_features']:
                    params = {
                        'n_estimators': n_estimators,
                        'max_depth': max_depth,
                        'min_samples_split': min_samples_split,
                        'min_samples_leaf': min_samples_leaf,
                        'max_features': max_features
                    }
                    accuracy = train_and_evaluate(X_train, X_test, y_train, y_test, params)
                    results.append((params, accuracy))
                    print(f"Params: {params}, Accuracy: {accuracy}")
print("\n\n\n\n")
# Find the best parameters
best_params, best_accuracy = max(results, key=lambda x: x[1])
print(f"Best Params: {best_params}, Best Accuracy: {best_accuracy}")
print("\n")

based on my results :

```
# Best Params: {'n_estimators': 250, 'max_depth': 50, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'log2'}, Best Accuracy: 0.80356764793396

```



---
> creating a file to save the results

In [None]:
with open("/content/results_RF.txt", "a") as file1:
  file1.write("\n")
  file1.write(f"Best Params: {best_params}, Best Accuracy: {best_accuracy}")
  file1.write("\n....................................................................")

> fetching the saved information form the file


In [None]:
with open("/content/results_RF.txt", "r") as file2:
  f2=file2.read()
  print(f2)