In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, confusion_matrix


df = pd.read_csv('train.csv')

X_raw = df.iloc[:, :-1]
y_raw = df.iloc[:, -1]


X_encoded = pd.get_dummies(X_raw)


if y_raw.dtype == 'object':
    y_encoded = LabelEncoder().fit_transform(y_raw)
else:
    y_encoded = y_raw


imputer = SimpleImputer(strategy='most_frequent')
X_imputed = pd.DataFrame(imputer.fit_transform(X_encoded), columns=X_encoded.columns)


X_train, X_test, y_train, y_test = train_test_split(X_imputed, y_encoded, test_size=0.2, random_state=42)

r
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)


y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)


print("Accuracy Score:", accuracy)
print("Confusion Matrix:\n", conf_matrix)


Accuracy Score: 0.6927374301675978
Confusion Matrix:
 [[  3   0  40]
 [  0   2  15]
 [  0   0 119]]


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


iris_df = pd.read_csv('iris.csv')


X = iris_df.iloc[:, :-1]
y = iris_df.iloc[:, -1]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model_default = RandomForestClassifier(n_estimators=10, random_state=42)
model_default.fit(X_train, y_train)
y_pred_default = model_default.predict(X_test)
default_score = accuracy_score(y_test, y_pred_default)

print(f"Default score with n_estimators=10: {default_score:.4f}")


best_score = 0
best_n = 10
for n in range(5, 105, 5):
    model = RandomForestClassifier(n_estimators=n, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    print(f"n_estimators={n}, Accuracy={score:.4f}")
    if score > best_score:
        best_score = score
        best_n = n

print(f"\nBest score: {best_score:.4f} with n_estimators={best_n}")


Default score with n_estimators=10: 1.0000
n_estimators=5, Accuracy=0.9667
n_estimators=10, Accuracy=1.0000
n_estimators=15, Accuracy=1.0000
n_estimators=20, Accuracy=1.0000
n_estimators=25, Accuracy=1.0000
n_estimators=30, Accuracy=1.0000
n_estimators=35, Accuracy=1.0000
n_estimators=40, Accuracy=1.0000
n_estimators=45, Accuracy=1.0000
n_estimators=50, Accuracy=1.0000
n_estimators=55, Accuracy=1.0000
n_estimators=60, Accuracy=1.0000
n_estimators=65, Accuracy=1.0000
n_estimators=70, Accuracy=1.0000
n_estimators=75, Accuracy=1.0000
n_estimators=80, Accuracy=1.0000
n_estimators=85, Accuracy=1.0000
n_estimators=90, Accuracy=1.0000
n_estimators=95, Accuracy=1.0000
n_estimators=100, Accuracy=1.0000

Best score: 1.0000 with n_estimators=10
