In [1]:

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:

import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


In [3]:


train_path = "/content/drive/My Drive/hacktrain.csv"
test_path = "/content/drive/My Drive/hacktest.csv"
train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)
print("Train shape:", train_df.shape)
print("Test shape:", test_df.shape)


Train shape: (8000, 30)
Test shape: (2845, 29)


In [4]:
ndvi_cols = [col for col in train_df.columns if '_N' in col]
print("NDVI Time Points:", len(ndvi_cols))

NDVI Time Points: 27


In [5]:
def preprocess_ndvi(df):
    df = df.copy()
    imputer = SimpleImputer(strategy='median')
    df[ndvi_cols] = imputer.fit_transform(df[ndvi_cols])
    for i in range(len(ndvi_cols)):
        if 0 < i < len(ndvi_cols)-1:
            df[ndvi_cols[i]] = (
                df[ndvi_cols[i-1]] + df[ndvi_cols[i]] + df[ndvi_cols[i+1]]
            ) / 3
    return df

In [7]:
def add_features(df):
    df = df.copy()
    ndvi = df[ndvi_cols]
    df['ndvi_mean'] = ndvi.mean(axis=1)
    df['ndvi_std'] = ndvi.std(axis=1)
    df['ndvi_min'] = ndvi.min(axis=1)
    df['ndvi_max'] = ndvi.max(axis=1)
    df['ndvi_range'] = df['ndvi_max'] - df['ndvi_min']
    df['ndvi_median'] = ndvi.median(axis=1)
    df['ndvi_trend'] = ndvi[ndvi_cols[-1]] - ndvi[ndvi_cols[0]]
    df['ndvi_first_half_mean'] = ndvi.iloc[:, :len(ndvi_cols)//2].mean(axis=1)
    df['ndvi_second_half_mean'] = ndvi.iloc[:, len(ndvi_cols)//2:].mean(axis=1)
    return df


In [8]:
train_df = preprocess_ndvi(train_df)
test_df = preprocess_ndvi(test_df)
train_df = add_features(train_df)
test_df = add_features(test_df)


In [9]:
X = train_df[ndvi_cols + [
    'ndvi_mean', 'ndvi_std', 'ndvi_min', 'ndvi_max',
    'ndvi_range', 'ndvi_median', 'ndvi_trend',
    'ndvi_first_half_mean', 'ndvi_second_half_mean'
]]
y = train_df['class']
X_test = test_df[X.columns]
labels = sorted(y.unique())
label_to_int = {label: i for i, label in enumerate(labels)}
int_to_label = {i: label for label, i in label_to_int.items()}
y = y.map(label_to_int)


In [10]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('lr', LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000))
])

pipeline.fit(X, y)





In [11]:
y_pred = pipeline.predict(X_test)
y_pred_labels = pd.Series(y_pred).map(int_to_label)


In [12]:
submission = pd.DataFrame({
    'ID': test_df['ID'],
    'class': y_pred_labels
})
print(submission.head())
submission_path = "/content/drive/My Drive/hackathon/submission.csv"
submission.to_csv(submission_path, index=False)
print("Submission saved to:", submission_path)


   ID    class
0   1     farm
1   2   forest
2   3  orchard
3   4   forest
4   5   forest
Submission saved to: /content/drive/My Drive/hackathon/submission.csv
