In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.impute import KNNImputer

from google.colab import files

In [None]:
train = pd.read_csv('/content/hacktrain.csv')
test = pd.read_csv('/content/hacktest.csv')

print("Train shape:", train.shape)
print("Test shape:", test.shape)
train.head()

Train shape: (8000, 30)
Test shape: (2845, 29)


Unnamed: 0.1,Unnamed: 0,ID,class,20150720_N,20150602_N,20150517_N,20150501_N,20150415_N,20150330_N,20150314_N,...,20140610_N,20140525_N,20140509_N,20140423_N,20140407_N,20140322_N,20140218_N,20140202_N,20140117_N,20140101_N
0,0,1,water,637.595,658.668,-1882.03,-1924.36,997.904,-1739.99,630.087,...,,-1043.16,-1942.49,267.138,,,211.328,-2203.02,-1180.19,433.906
1,1,2,water,634.24,593.705,-1625.79,-1672.32,914.198,-692.386,707.626,...,,-933.934,-625.385,120.059,364.858,476.972,220.878,-2250.0,-1360.56,524.075
2,3,4,water,58.0174,-1599.16,,-1052.63,,-1564.63,,...,-1025.88,368.622,,-1227.8,304.621,,369.214,-2202.12,,-1343.55
3,4,5,water,72.518,,380.436,-1256.93,515.805,-1413.18,-802.942,...,-1813.95,155.624,,-924.073,432.15,282.833,298.32,-2197.36,,-826.727
4,7,8,water,1136.44,,,1647.83,1935.8,,2158.98,...,1535.0,1959.43,-279.317,-384.915,-113.406,1020.72,1660.65,-116.801,-568.05,-1357.14


In [None]:
# Drop extra unnamed columns
train.drop(columns=[col for col in train.columns if "Unnamed" in col], inplace=True)
test.drop(columns=[col for col in test.columns if "Unnamed" in col], inplace=True)

# Fill missing NDVI values using KNN Imputer
ndvi_columns = [col for col in train.columns if '_N' in col]

imputer = KNNImputer(n_neighbors=5)
train[ndvi_columns] = imputer.fit_transform(train[ndvi_columns])
test[ndvi_columns] = imputer.transform(test[ndvi_columns])

In [None]:
le = LabelEncoder()
train['class_encoded'] = le.fit_transform(train['class'])
print(dict(zip(le.classes_, le.transform(le.classes_))))  # See encoding

{'farm': np.int64(0), 'forest': np.int64(1), 'grass': np.int64(2), 'impervious': np.int64(3), 'orchard': np.int64(4), 'water': np.int64(5)}


In [None]:
ndvi_columns = [col for col in train.columns if '_N' in col]

# Function to extract trend (slope) from time series
def calculate_trend(row):
    return np.polyfit(range(len(row)), row, 1)[0]

# Apply feature engineering
for df in [train, test]:
    df['ndvi_mean'] = df[ndvi_columns].mean(axis=1)
    df['ndvi_std'] = df[ndvi_columns].std(axis=1)
    df['ndvi_min'] = df[ndvi_columns].min(axis=1)
    df['ndvi_max'] = df[ndvi_columns].max(axis=1)
    df['ndvi_trend'] = df[ndvi_columns].apply(calculate_trend, axis=1)
    df['ndvi_skew'] = df[ndvi_columns].skew(axis=1)
    df['ndvi_kurtosis'] = df[ndvi_columns].kurtosis(axis=1)
    df['ndvi_median'] = df[ndvi_columns].median(axis=1)
    df['ndvi_range'] = df['ndvi_max'] - df['ndvi_min']
    df['ndvi_q25'] = df[ndvi_columns].quantile(0.25, axis=1)
    df['ndvi_q75'] = df[ndvi_columns].quantile(0.75, axis=1)

In [None]:
features = ['ndvi_mean', 'ndvi_std', 'ndvi_min', 'ndvi_max', 'ndvi_trend', 'ndvi_skew',
            'ndvi_kurtosis','ndvi_range', 'ndvi_median', 'ndvi_q25', 'ndvi_q75']
X = train[features]
y = train['class_encoded']
X_test = test[features]

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

model = LogisticRegression(C=0.5, max_iter=1000, multi_class='multinomial', solver='lbfgs')
model.fit(X_scaled, y)

val_preds = model.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, val_preds))



Validation Accuracy: 0.879375


In [None]:
test_preds = model.predict(X_test_scaled)
test_labels = le.inverse_transform(test_preds)

submission = pd.DataFrame({
    'ID': test['ID'],
    'class': test_labels
})

submission['ID'] = submission['ID'].astype(int)
submission['class'] = submission['class'].astype(str)
submission.to_csv('submission.csv', index=False, encoding='utf-8-sig',lineterminator='\n')
files.download('submission.csv')
print(submission.head())

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

   ID   class
0   1  forest
1   2  forest
2   3  forest
3   4  forest
4   5  forest
