In [None]:
import pandas as pd
import numpy as np
from sklearn.impute import KNNImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

In [None]:
# 1. Load datasets
train_df = pd.read_csv('hacktrain.csv')  # update with your actual train file path
test_df = pd.read_csv('hacktest.csv')    # update with your actual test file path

In [None]:
# Identify NDVI columns
ndvi_cols = [col for col in train_df.columns if '_N' in col]

In [None]:
# Convert all NDVI columns to numeric (to fix any type issues)
train_df[ndvi_cols] = train_df[ndvi_cols].apply(pd.to_numeric, errors='coerce')
test_df[ndvi_cols] = test_df[ndvi_cols].apply(pd.to_numeric, errors='coerce')

In [None]:
# Impute missing NDVI values using KNN imputer
imputer = KNNImputer(n_neighbors=10)
train_df[ndvi_cols] = imputer.fit_transform(train_df[ndvi_cols])
test_df[ndvi_cols] = imputer.transform(test_df[ndvi_cols])


In [None]:
# Feature Engineering: Add summary statistics
for df in [train_df, test_df]:
    df['ndvi_mean'] = df[ndvi_cols].mean(axis=1)
    df['ndvi_std'] = df[ndvi_cols].std(axis=1)
    df['ndvi_min'] = df[ndvi_cols].min(axis=1)
    df['ndvi_max'] = df[ndvi_cols].max(axis=1)
    df['ndvi_range'] = df['ndvi_max'] - df['ndvi_min']
    df['ndvi_trend'] = df[ndvi_cols].apply(lambda row: np.polyfit(range(len(row)), row, 1)[0], axis=1)

In [None]:
# Encode class labels
label_encoder = LabelEncoder()
train_df['class_encoded'] = label_encoder.fit_transform(train_df['class'])

In [None]:
# Prepare training data
X = train_df[ndvi_cols + ['ndvi_mean', 'ndvi_std', 'ndvi_min', 'ndvi_max', 'ndvi_range', 'ndvi_trend']]
y = train_df['class_encoded']


In [None]:
from sklearn.preprocessing import StandardScaler, LabelEncoder
# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_test_final = scaler.transform(test_df[ndvi_cols + ['ndvi_mean', 'ndvi_std', 'ndvi_min', 'ndvi_max', 'ndvi_range', 'ndvi_trend']])

In [None]:
# Train-validation split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.6, stratify=y, random_state=62)

In [None]:
# Logistic Regression Model
model = LogisticRegression(max_iter=1000, solver='lbfgs')
model.fit(X_train, y_train)

In [None]:
# Evaluate on validation set
val_preds = model.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, val_preds))
print(classification_report(y_val, val_preds, target_names=label_encoder.classes_))

Validation Accuracy: 0.9235416666666667
              precision    recall  f1-score   support

        farm       0.75      0.71      0.73       505
      forest       0.95      0.98      0.96      3695
       grass       0.89      0.67      0.76       118
  impervious       0.88      0.83      0.86       401
     orchard       0.50      0.11      0.18        18
       water       0.89      0.76      0.82        63

    accuracy                           0.92      4800
   macro avg       0.81      0.68      0.72      4800
weighted avg       0.92      0.92      0.92      4800



In [None]:
# 📄 Save submission file
final_preds = model.predict(X_test_final)
final_labels = label_encoder.inverse_transform(final_preds)
submission = pd.DataFrame({'ID': test_df['ID'], 'class': final_labels})
submission.to_csv("submission.csv", index=False)