# Import Modules & Data Files

In [1]:
import numpy as np
import pandas as pd
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/summer-analytics-mid-hackathon/hacktest.csv
/kaggle/input/summer-analytics-mid-hackathon/hacktrain.csv


In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
from scipy.signal import savgol_filter
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.metrics import classification_report, accuracy_score

In [4]:
train_df = pd.read_csv("/kaggle/input/summer-analytics-mid-hackathon/hacktrain.csv")
test_df = pd.read_csv("/kaggle/input/summer-analytics-mid-hackathon/hacktest.csv")

# NDVI Denoising + Imputation

In [5]:
def process_ndvi(df, win=5, poly=2):
    ndvi_cols = sorted([c for c in df.columns if c.endswith('_N')])
    mat = df[ndvi_cols]
    def sg_row(row):
        filled = row.fillna(row.median()).to_numpy()
        w = min(win, len(filled))
        if w % 2 == 0:
            w -= 1
        if w < 3:
            return filled
        return savgol_filter(filled, w, poly)
    filtered = mat.apply(sg_row, axis=1, result_type='expand')
    filtered.columns = ndvi_cols
    filtered = filtered.ffill(axis=1).bfill(axis=1)
    return filtered

In [6]:
train_ndvi = process_ndvi(train_df)

In [7]:
test_ndvi  = process_ndvi(test_df)

# Temporal Feature Engineering

In [8]:
def create_temporal_features(ndvi_df):
    return pd.DataFrame({
        'mean_ndvi'    : ndvi_df.mean(axis=1),
        'std_ndvi'     : ndvi_df.std(axis=1),
        'max_ndvi'     : ndvi_df.max(axis=1),
        'min_ndvi'     : ndvi_df.min(axis=1),
        'amplitude'    : ndvi_df.max(axis=1) - ndvi_df.min(axis=1),
        'median_ndvi'  : ndvi_df.median(axis=1),
        'q25_ndvi'     : ndvi_df.quantile(0.25, axis=1),
        'q75_ndvi'     : ndvi_df.quantile(0.75, axis=1)
    })

In [9]:
X_train = create_temporal_features(train_ndvi)

In [10]:
X_test  = create_temporal_features(test_ndvi)

# Encode Labels & Scale Features

In [11]:
le = LabelEncoder()
y_train = le.fit_transform(train_df['class'])
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s  = scaler.transform(X_test)

# Model Definition + CV + Fit on Training Set

In [12]:
logreg = LogisticRegression(
    multi_class='multinomial',
    solver='saga',
    penalty='elasticnet',
    l1_ratio=0.5,
    C=0.3,
    class_weight='balanced',
    max_iter=2000,
    n_jobs=-1,
    random_state=42
)

In [13]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [14]:
cv_scores = cross_val_score(logreg, X_train_s, y_train, cv=cv, scoring='accuracy')
f'5-fold CV accuracy: {cv_scores.mean():.3f} ± {cv_scores.std():.3f}'

'5-fold CV accuracy: 0.540 ± 0.039'

In [15]:
logreg.fit(X_train_s, y_train)

In [16]:
classification_report(y_train, logreg.predict(X_train_s), target_names=le.classes_)

'              precision    recall  f1-score   support\n\n        farm       0.27      0.45      0.33       841\n      forest       0.95      0.55      0.69      6159\n       grass       0.10      0.55      0.17       196\n  impervious       0.84      0.68      0.75       669\n     orchard       0.01      0.40      0.03        30\n       water       0.14      0.70      0.24       105\n\n    accuracy                           0.55      8000\n   macro avg       0.39      0.55      0.37      8000\nweighted avg       0.84      0.55      0.64      8000\n'

# Create Submission

In [17]:
test_pred_labels = le.inverse_transform(logreg.predict(X_test_s))
submission = pd.DataFrame({
    'ID'   : test_df['ID'],
    'class': test_pred_labels
})

In [18]:
submission.to_csv('submission.csv', index=False)

In [19]:
submission.head()

Unnamed: 0,ID,class
0,1,orchard
1,2,grass
2,3,forest
3,4,water
4,5,forest
