In [1]:
import numpy as np
import pandas as pd
from sklearn.impute import KNNImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder, PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report


train_df = pd.read_csv("hacktrain.csv")
test_df = pd.read_csv("hacktest.csv")

train_df.drop(columns=["Unnamed: 0", "ID"], inplace=True)
test_ids = test_df["ID"]
test_df.drop(columns=["Unnamed: 0", "ID"], inplace=True)

In [2]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
train_df["class"] = le.fit_transform(train_df["class"])


X = train_df.drop(columns=["class"])
y = train_df["class"]
X_test = test_df.copy()

In [3]:
def add_ndvi_features(df):
    df_new = df.copy()
    df_new["ndvi_mean"] = df.mean(axis=1)
    df_new["ndvi_std"] = df.std(axis=1)
    df_new["ndvi_min"] = df.min(axis=1)
    df_new["ndvi_max"] = df.max(axis=1)
    df_new["ndvi_range"] = df_new["ndvi_max"] - df_new["ndvi_min"]
    return df_new

X = add_ndvi_features(X)
X_test = add_ndvi_features(X_test)

In [4]:
imputer = KNNImputer(n_neighbors=5)
X_imputed = imputer.fit_transform(X)
X_test_imputed = imputer.transform(X_test)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
oof_preds = np.zeros((len(X_scaled), len(np.unique(y))))
test_preds = np.zeros((len(X_test_scaled), len(np.unique(y))))

for fold, (train_idx, val_idx) in enumerate(skf.split(X_scaled, y)):
    X_train, X_val = X_scaled[train_idx], X_scaled[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]

    model = LogisticRegression(penalty="l1", solver="saga", max_iter=1000, multi_class="multinomial")
    model.fit(X_train, y_train)
    
    oof_preds[val_idx] = model.predict_proba(X_val)
    test_preds += model.predict_proba(X_test_scaled) / skf.n_splits

y_pred_final = np.argmax(test_preds, axis=1)
y_pred_labels = le.inverse_transform(y_pred_final)



In [None]:
submission = pd.DataFrame({"ID": test_ids, "class": y_pred_labels})
submission.to_csv("submission.csv", index=False)
print("✅ submission.csv saved!")