In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel

 df = pd.read_csv('merged.csv')

 
X = df.drop(columns=["unique_id", "Type", "Type_x", "Type_y"])
y = df["Type"]

X = X.fillna(0)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


feature_selector = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
feature_selector.fit(X_scaled, y)

selector = SelectFromModel(feature_selector, threshold="0.75*mean", prefit=True)
X_selected = selector.transform(X_scaled)

print(f"Original features: {X.shape[1]}, Selected features: {X_selected.shape[1]}")


X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42, stratify=y)

param_grid = {
    'n_estimators': [400, 500, 600],
    'max_depth': [22, 26, 32],
    'min_samples_split': [3, 5, 8],
    'min_samples_leaf': [1, 2, 3],
    'max_features': ['sqrt'],
    'bootstrap': [True],
    'max_samples': [0.95],
}

rf = RandomForestClassifier(class_weight={0:1, 1:1, 2:1, 3:1, 4:1, 5:2, 6:1}, random_state=42, n_jobs=-1)

rf_grid_search = GridSearchCV(
    rf, param_grid, cv=3, verbose=1, n_jobs=-1, scoring='accuracy'
)
rf_grid_search.fit(X_train, y_train)


best_rf = rf_grid_search.best_estimator_
best_rf.fit(X_train, y_train)


y_pred = best_rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Final Optimized Accuracy: {accuracy * 100:.2f}%")
print("Optimized Classification Report:\n", classification_report(y_test, y_pred))
