In [1]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

# Load data
df = pd.read_csv("final_preprocessed_data.csv")

X = df.drop("Cover_Type", axis=1)
y = df["Cover_Type"]

class_map = {
    0: "Spruce/Fir",
    1: "Lodgepole Pine",
    2: "Ponderosa Pine",
    3: "Cottonwood/Willow",
    4: "Aspen",
    5: "Douglas-fir",
    6: "Krummholz"
}

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("model", RandomForestClassifier(
        n_estimators=300,
        random_state=42,
        class_weight="balanced",
        n_jobs=-1
    ))
])

pipeline.fit(X_train, y_train)

# Save EXACTLY these three files
joblib.dump(pipeline, "models/final_pipeline.pkl")
joblib.dump(X.columns.tolist(), "models/model_features.pkl")
joblib.dump(class_map, "models/class_map.pkl")

print("✅ Model saved cleanly")


✅ Model saved cleanly


In [2]:
print(pipeline.predict(X_test.iloc[[0]]))
print(pipeline.predict(X_test.iloc[[1]]))
print(pipeline.predict(X_test.iloc[[2]]))
print(pipeline.predict(X_test.iloc[[3]]))


[1]
[5]
[0]
[0]
