In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score


In [None]:
train = pd.read_csv("hacktrain.csv")
test = pd.read_csv("hacktest.csv")

train.drop(columns=['Unnamed: 0'], inplace=True)
test.drop(columns=['Unnamed: 0'], inplace=True)

In [None]:
X = train.drop(columns=['ID', 'class']).interpolate(axis=1, limit_direction='both')
X_test = test.drop(columns=['ID']).interpolate(axis=1, limit_direction='both')

test_ids = test['ID']


In [None]:
y = train['class']
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

class_names = label_encoder.classes_


In [None]:
def add_features(df):
    df = df.copy()
    df['ndvi_mean'] = df.mean(axis=1)
    df['ndvi_std'] = df.std(axis=1)
    df['ndvi_max'] = df.max(axis=1)
    df['ndvi_min'] = df.min(axis=1)
    df['ndvi_range'] = df['ndvi_max'] - df['ndvi_min']
    df['ndvi_first'] = df.iloc[:, 0]
    df['ndvi_last'] = df.iloc[:, -1]
    df['ndvi_slope'] = (df['ndvi_last'] - df['ndvi_first']) / (df.shape[1] - 1)
    df['ndvi_positive_trend'] = (df.iloc[:, 1:].values > df.iloc[:, :-1].values).mean(axis=1)
    return df


X = train.drop(columns=['ID', 'class']).fillna(train.median(numeric_only=True))
X_test = test.drop(columns=['ID']).fillna(train.median(numeric_only=True))

X = add_features(X)
X_test = add_features(X_test)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

from sklearn.linear_model import LogisticRegression

best_acc = 0
best_model = None

for c in [0.01, 0.1, 1, 10]:
    model = LogisticRegression(C=c, max_iter=3000, solver='lbfgs')
    model.fit(X_train_scaled, y_train)
    val_preds = model.predict(X_val_scaled)
    acc = accuracy_score(y_val, val_preds)
    print(f"C = {c} -> Accuracy: {acc:.4f}")

    if acc > best_acc:
        best_acc = acc
        best_model = model

model = best_model

C = 0.01 -> Accuracy: 0.8981
C = 0.1 -> Accuracy: 0.9069
C = 1 -> Accuracy: 0.9075
C = 10 -> Accuracy: 0.9044


In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=2, include_bias=False)

X_train_poly = poly.fit_transform(X_train_scaled)
X_val_poly = poly.transform(X_val_scaled)
X_test_poly = poly.transform(X_test_scaled)

model = LogisticRegression(C=0.1, max_iter=3000, solver='lbfgs')
model.fit(X_train_poly, y_train)

y_pred = model.predict(X_val_poly)
print("Accuracy:", accuracy_score(y_val, y_pred))
print("Classification Report:\n", classification_report(y_val, y_pred, target_names=class_names))

Accuracy: 0.944375
Classification Report:
               precision    recall  f1-score   support

        farm       0.81      0.81      0.81       168
      forest       0.97      0.98      0.98      1232
       grass       0.88      0.59      0.71        39
  impervious       0.87      0.90      0.88       134
     orchard       0.75      0.50      0.60         6
       water       0.89      0.76      0.82        21

    accuracy                           0.94      1600
   macro avg       0.86      0.76      0.80      1600
weighted avg       0.94      0.94      0.94      1600



In [None]:
test_preds = model.predict(X_test_poly)
test_labels = label_encoder.inverse_transform(test_preds)

submission = pd.DataFrame({'ID': test_ids, 'class': test_labels})
submission.to_csv("submission.csv", index=False)

In [None]:
submission.to_csv("submission.csv", index=False)

from google.colab import files
files.download("submission.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Making some changes

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

train = pd.read_csv("hacktrain.csv")
test = pd.read_csv("hacktest.csv")
train.drop(columns=["Unnamed: 0"], inplace=True)
test.drop(columns=["Unnamed: 0"], inplace=True)

y = train["class"]
test_ids = test["ID"]
X = train.drop(columns=["ID", "class"])
X_test = test.drop(columns=["ID"])

X = X.interpolate(axis=1, limit_direction='both')
X_test = X_test.interpolate(axis=1, limit_direction='both')

ndvi_cols = X.columns[:27]
drop_cols = [col for col in ndvi_cols if X[col].isna().mean() > 0.5 or X[col].std() < 0.02]
X = X.drop(columns=drop_cols)
X_test = X_test.drop(columns=drop_cols)

def add_trend_features(df):
    df = df.copy()
    df['ndvi_mean'] = df.mean(axis=1)
    df['ndvi_std'] = df.std(axis=1)
    df['ndvi_max'] = df.max(axis=1)
    df['ndvi_min'] = df.min(axis=1)
    df['ndvi_range'] = df['ndvi_max'] - df['ndvi_min']
    df['ndvi_slope'] = (df.iloc[:, -1] - df.iloc[:, 0]) / (df.shape[1] - 1)
    df['ndvi_positive_trend'] = (df.iloc[:, 1:].values > df.iloc[:, :-1].values).mean(axis=1)
    df['early_mean'] = df.iloc[:, :9].mean(axis=1)
    df['mid_mean'] = df.iloc[:, 9:18].mean(axis=1)
    df['late_mean'] = df.iloc[:, 18:].mean(axis=1)
    return df

X = add_trend_features(X)
X_test = add_trend_features(X_test)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
class_names = label_encoder.classes_

X_train, X_val, y_train, y_val = train_test_split(
    X, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train_scaled)
X_val_poly = poly.transform(X_val_scaled)
X_test_poly = poly.transform(X_test_scaled)

model = LogisticRegression(C=0.1, max_iter=3000, solver='lbfgs')
model.fit(X_train_poly, y_train)

y_pred = model.predict(X_val_poly)
print("Accuracy:", accuracy_score(y_val, y_pred))
print("Classification Report:\n", classification_report(y_val, y_pred, target_names=class_names))

test_preds = model.predict(X_test_poly)
test_labels = label_encoder.inverse_transform(test_preds)
submission = pd.DataFrame({'ID': test_ids, 'class': test_labels})
submission.to_csv("submission.csv", index=False)

from google.colab import files
files.download("submission.csv")


Accuracy: 0.940625
Classification Report:
               precision    recall  f1-score   support

        farm       0.84      0.80      0.82       168
      forest       0.97      0.98      0.98      1232
       grass       0.85      0.59      0.70        39
  impervious       0.83      0.88      0.86       134
     orchard       0.33      0.17      0.22         6
       water       0.85      0.81      0.83        21

    accuracy                           0.94      1600
   macro avg       0.78      0.70      0.73      1600
weighted avg       0.94      0.94      0.94      1600



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>