In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

np.random.seed(42)
num_students = 100

data = {
    'student_id': range(1, num_students + 1),
    'subject1': np.random.randint(50, 100, size=num_students),
    'subject2': np.random.randint(50, 100, size=num_students),
    'subject3': np.random.randint(50, 100, size=num_students),
    'study_hours': np.random.randint(1, 6, size=num_students),
    'extracurricular_activities': np.random.choice([0, 1], size=num_students),
    'target_variable': np.random.choice([0, 1], size=num_students)
}

df = pd.DataFrame(data)
df.head()
if len(data) != 0:
    if {'subject1', 'subject2', 'subject3'}.issubset(data.keys()):
        df['total_score'] = df['subject1'] + df['subject2'] + df['subject3']
    else:
        print("Warning: Subject columns ('subject1', 'subject2', 'subject3') not found for feature engineering.")

    X = df.drop('target_variable', axis=1)
    y = df['target_variable']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestClassifier(random_state=42)

    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5, 10]
    }

    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

    grid_search.fit(X_train, y_train)

    best_model = grid_search.best_estimator_
    print("Best Hyperparameters:", grid_search.best_params_)

    y_pred = best_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy of the best model: {accuracy}")
else:
  print("Error: Dataframe is empty.  Cannot perform operations.")



Best Hyperparameters: {'max_depth': None, 'min_samples_split': 10, 'n_estimators': 200}
Accuracy of the best model: 0.6
