In [3]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

def preprocess_data(df):
    """Basic preprocessing for NDVI data"""
    ndvi_cols = [col for col in df.columns if '_N' in col]

    features = pd.DataFrame()
    features['ID'] = df['ID']

    features['ndvi_mean'] = df[ndvi_cols].mean(axis=1)
    features['ndvi_std'] = df[ndvi_cols].std(axis=1)
    features['ndvi_min'] = df[ndvi_cols].min(axis=1)
    features['ndvi_max'] = df[ndvi_cols].max(axis=1)
    features['ndvi_range'] = features['ndvi_max'] - features['ndvi_min']

    half = len(ndvi_cols) // 2
    features['first_half_mean'] = df[ndvi_cols[:half]].mean(axis=1)
    features['second_half_mean'] = df[ndvi_cols[half:]].mean(axis=1)
    return features

train = pd.read_csv('hacktrain.csv')
test = pd.read_csv('hacktest.csv')

X_train = preprocess_data(train.drop(columns=['class']))
y_train = train['class']
X_test = preprocess_data(test)

model = make_pipeline(
    SimpleImputer(strategy='median'),
    StandardScaler(),
    LogisticRegression(multi_class='multinomial',
                      solver='lbfgs',
                      max_iter=1000,
                      C=0.1)
)

model.fit(X_train.drop(columns=['ID']), y_train)

predictions = model.predict(X_test.drop(columns=['ID']))

submission = pd.DataFrame({'ID': X_test['ID'], 'class': predictions})
submission.to_csv('submission.csv', index=False)

