In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.impute import KNNImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

train = pd.read_csv("/kaggle/input/csvs-file/hacktrain.csv")
test = pd.read_csv("/kaggle/input/csvs-file/hacktest.csv")


In [2]:
ndvi_cols = [col for col in train.columns if col.endswith('_N')]

for df in [train, test]:
    for col in ndvi_cols:
        df[col] = df[col].rolling(window=3, min_periods=1, center=True).median()


In [3]:
combined = pd.concat([train[ndvi_cols], test[ndvi_cols]], axis=0)
imputer = KNNImputer(n_neighbors=5)
combined_imputed = imputer.fit_transform(combined)

train[ndvi_cols] = combined_imputed[:len(train)]
test[ndvi_cols] = combined_imputed[len(train):]


In [4]:
def add_features(df):
    df['ndvi_mean'] = df[ndvi_cols].mean(axis=1)
    df['ndvi_std'] = df[ndvi_cols].std(axis=1)
    df['ndvi_min'] = df[ndvi_cols].min(axis=1)
    df['ndvi_max'] = df[ndvi_cols].max(axis=1)
    df['ndvi_range'] = df['ndvi_max'] - df['ndvi_min']
    df['ndvi_trend'] = df[ndvi_cols].apply(
        lambda row: np.polyfit(range(len(ndvi_cols)), row.values, 1)[0], axis=1
    )
    return df

train = add_features(train)
test = add_features(test)


In [5]:
features = ndvi_cols + ['ndvi_mean', 'ndvi_std', 'ndvi_min', 'ndvi_max', 'ndvi_range', 'ndvi_trend']
X = train[features]
y = train['class']
X_test = test[features]


In [6]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('lr', LogisticRegression(max_iter=500, multi_class='multinomial', solver='lbfgs'))
])

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(pipeline, X, y, cv=cv, scoring='accuracy')
print("Cross-validation scores:", scores)
print("Mean accuracy:", scores.mean())


Cross-validation scores: [0.94     0.94625  0.945625 0.95     0.95375 ]
Mean accuracy: 0.9471250000000001


In [7]:
pipeline.fit(X, y)
preds = pipeline.predict(X_test)

In [8]:
submission = pd.DataFrame({
    'ID': test['ID'],
    'class': preds
})
submission.to_csv('submission.csv', index=False)
print("submission.csv created successfully!")


submission.csv created successfully!
