In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report


train_df = pd.read_csv("hacktrain (5).csv")
test_df = pd.read_csv("hacktest (5).csv")

ndvi_columns = [col for col in train_df.columns if '_N' in col]

imputer = SimpleImputer(strategy="mean")
train_ndvi = imputer.fit_transform(train_df[ndvi_columns])
test_ndvi = imputer.transform(test_df[ndvi_columns])

scaler = StandardScaler()
train_ndvi_scaled = scaler.fit_transform(train_ndvi)
test_ndvi_scaled = scaler.transform(test_ndvi)


def engineer_features(ndvi_scaled, original_df):
    df = pd.DataFrame(ndvi_scaled, columns=ndvi_columns)


    df['ndvi_mean'] = df.mean(axis=1)
    df['ndvi_std'] = df.std(axis=1)
    df['ndvi_min'] = df.min(axis=1)
    df['ndvi_max'] = df.max(axis=1)
    df['ndvi_range'] = df['ndvi_max'] - df['ndvi_min']

    df['early_mean'] = df[ndvi_columns[:9]].mean(axis=1)
    df['mid_mean'] = df[ndvi_columns[9:18]].mean(axis=1)
    df['late_mean'] = df[ndvi_columns[18:]].mean(axis=1)


    x_time = np.arange(len(ndvi_columns))
    df['ndvi_trend'] = [np.polyfit(x_time, row, 1)[0] for row in ndvi_scaled]

    diffs = np.diff(ndvi_scaled, axis=1)
    df['ndvi_lag_mean'] = diffs.mean(axis=1)
    df['ndvi_lag_std'] = diffs.std(axis=1)
    df['ndvi_max_diff'] = diffs.max(axis=1)


    sorted_ndvi = np.sort(ndvi_scaled, axis=1)
    for i in range(3):
        df[f'top{i+1}_ndvi'] = sorted_ndvi[:, -i-1]


    time = np.linspace(0, 1, len(ndvi_columns))
    for k in [1, 2, 3]:
        sin_proj = np.dot(ndvi_scaled, np.sin(2 * np.pi * k * time))
        cos_proj = np.dot(ndvi_scaled, np.cos(2 * np.pi * k * time))
        df[f'sin_{k}'] = sin_proj
        df[f'cos_{k}'] = cos_proj

    return df


X_train_full = engineer_features(train_ndvi_scaled, train_df)
X_test_full = engineer_features(test_ndvi_scaled, test_df)
y = train_df['class']

poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
X_poly = poly.fit_transform(X_train_full)
X_test_poly = poly.transform(X_test_full)

X_train, X_val, y_train, y_val = train_test_split(X_poly, y, stratify=y, test_size=0.2, random_state=42)

model = LogisticRegression(
    multi_class='multinomial',
    max_iter=2000,
    solver='lbfgs',
    C=10,
    class_weight='balanced',
    random_state=42
)
model.fit(X_train, y_train)

y_pred = model.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, y_pred))
print(classification_report(y_val, y_pred))

test_preds = model.predict(X_test_poly)
submission = pd.DataFrame({
    "ID": test_df["ID"],
    "class": test_preds
})
submission.to_csv("submission (5).csv", index=False)
print("✅ Final submission.csv saved!")




Validation Accuracy: 0.91875
              precision    recall  f1-score   support

        farm       0.79      0.78      0.78       168
      forest       0.97      0.96      0.97      1232
       grass       0.62      0.62      0.62        39
  impervious       0.77      0.87      0.82       134
     orchard       0.57      0.67      0.62         6
       water       0.58      0.52      0.55        21

    accuracy                           0.92      1600
   macro avg       0.72      0.74      0.72      1600
weighted avg       0.92      0.92      0.92      1600

✅ Final submission.csv saved!
