In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.impute import KNNImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier

In [2]:
train = pd.read_csv('/kaggle/input/hackathon/hacktrain.csv')
test = pd.read_csv('/kaggle/input/hackathon/hacktest.csv')
test_ids = test['ID'].copy()

In [3]:
ndvi_cols = [col for col in train.columns if '_N' in col]
X_train_raw = train[ndvi_cols]
y_train = train['class']
X_test_raw = test[ndvi_cols]

In [4]:
imputer = KNNImputer(n_neighbors=3)
X_train_imputed = pd.DataFrame(imputer.fit_transform(X_train_raw), columns=ndvi_cols)
X_test_imputed = pd.DataFrame(imputer.transform(X_test_raw), columns=ndvi_cols)

In [5]:
def generate_features(df):
    df_feat = df.copy()
    df_diff = df.diff(axis=1)
    df_feat['mean'] = df.mean(axis=1)
    df_feat['std'] = df.std(axis=1)
    df_feat['max'] = df.max(axis=1)
    df_feat['min'] = df.min(axis=1)
    df_feat['range'] = df_feat['max'] - df_feat['min']
    df_feat['median'] = df.median(axis=1)
    df_feat['q1'] = df.quantile(0.25, axis=1)
    df_feat['q3'] = df.quantile(0.75, axis=1)
    df_feat['zero_crossings'] = df_diff.apply(
    lambda x: np.sum(np.diff(np.sign(np.nan_to_num(x))) != 0), axis=1)
    df_feat['slope'] = df.apply(lambda x: np.polyfit(range(len(x)), x, 1)[0], axis=1)
    df_feat['diff_mean'] = df_diff.mean(axis=1)
    df_feat['diff_std'] = df_diff.std(axis=1)
    return df_feat

X_train_feat = generate_features(X_train_imputed)
X_test_feat = generate_features(X_test_imputed)

In [6]:
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)

In [7]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_feat)
X_test_scaled = scaler.transform(X_test_feat)

In [8]:
xgb = XGBClassifier(
    n_estimators=300,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
xgb.fit(X_train_scaled, y_train_encoded)

In [9]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(xgb, X_train_scaled, y_train_encoded, cv=cv)
print("XGBoost CV Accuracy:", cv_scores.mean())


XGBoost CV Accuracy: 0.95975


In [10]:
preds_encoded = xgb.predict(X_test_scaled)
preds = le.inverse_transform(preds_encoded)


submission = pd.DataFrame({'ID': test_ids, 'class': preds})
submission.to_csv('submission.csv', index=False)
print("Submission file created successfully.")

Submission file created successfully.
