In [1]:
# 1. Imports
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler, PolynomialFeatures
from sklearn.model_selection import StratifiedKFold, cross_val_score

# 2. Load data
train_df = pd.read_csv('hacktrain.csv')
test_df = pd.read_csv('hacktest.csv')

In [2]:
# 3. NDVI columns
ndvi_columns = [col for col in train_df.columns if col.endswith('_N')]

# 4. Combine for preprocessing
train_df['is_train'] = 1
test_df['is_train'] = 0
test_df['class'] = np.nan
full_df = pd.concat([train_df, test_df], ignore_index=True)

# 5. Interpolate missing NDVI values row-wise
full_df[ndvi_columns] = full_df[ndvi_columns].interpolate(axis=1, limit_direction='both')

# 6. Apply rolling mean smoothing
full_df[ndvi_columns] = full_df[ndvi_columns].rolling(window=3, axis=1, min_periods=1).mean()

  full_df[ndvi_columns] = full_df[ndvi_columns].rolling(window=3, axis=1, min_periods=1).mean()


In [3]:
# 7. Feature Engineering
def extract_ndvi_features(row):
    values = row[ndvi_columns].values
    return pd.Series({
        'ndvi_mean': np.mean(values),
        'ndvi_std': np.std(values),
        'ndvi_min': np.min(values),
        'ndvi_max': np.max(values),
        'ndvi_range': np.max(values) - np.min(values),
        'ndvi_q25': np.percentile(values, 25),
        'ndvi_q75': np.percentile(values, 75),
        'ndvi_median': np.median(values),
        'ndvi_trend': values[-1] - values[0]
    })

ndvi_features = full_df.apply(extract_ndvi_features, axis=1)
full_df = pd.concat([full_df[['ID', 'class', 'is_train']], ndvi_features], axis=1)

In [4]:
# 8. Split back into train and test
train_processed = full_df[full_df['is_train'] == 1].copy()
test_processed = full_df[full_df['is_train'] == 0].copy()

# 9. Encode labels
label_encoder = LabelEncoder()
train_processed['class'] = label_encoder.fit_transform(train_processed['class'])

# 10. Define features and labels
X = train_processed.drop(columns=['ID', 'class', 'is_train'])
y = train_processed['class']
X_test = test_processed.drop(columns=['ID', 'class', 'is_train'])

# 11. Scale + Polynomial features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X_scaled)

# 12. Cross-validation
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=300)
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model, X_poly, y, cv=cv, scoring='accuracy')
print(f"CV Accuracy: {np.mean(scores) * 100:.2f}% ± {np.std(scores) * 100:.2f}%")



CV Accuracy: 85.22% ± 0.58%


In [5]:
# 13. Final Training
model.fit(X_poly, y)

# 14. Predict on test set
X_test_scaled = scaler.transform(X_test)
X_test_poly = poly.transform(X_test_scaled)
y_pred = model.predict(X_test_poly)
y_pred_labels = label_encoder.inverse_transform(y_pred)

# 15. Submission
submission_df = pd.DataFrame({
    'ID': test_processed['ID'].astype(int),
    'class': y_pred_labels
})
submission_df.to_csv('submission.csv', index=False)
print("submission.csv generated successfully.")



submission.csv generated successfully.
