In [12]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("hacktrain.csv")

In [3]:
df.drop(columns=["Unnamed: 0", "ID"], inplace=True)

In [6]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['class'] = le.fit_transform(df['class'])


In [8]:
X = df.drop(columns=['class'])
y = df['class']

In [10]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

imp = IterativeImputer(random_state=42, max_iter=15)
X_imputed = imp.fit_transform(X)
X_imputed_df = pd.DataFrame(X_imputed, columns=X.columns)

In [13]:
X_imputed_df['mean_ndvi'] = X_imputed_df.mean(axis=1)
X_imputed_df['std_ndvi'] = X_imputed_df.std(axis=1)
X_imputed_df['trend'] = X_imputed_df.apply(lambda row: np.polyfit(np.arange(len(row)), row, 1)[0], axis=1)

In [14]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed_df)

In [16]:
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split


X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)
sm = SMOTE(random_state=42)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

In [17]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    n_estimators=300,
    max_depth=20,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)
model.fit(X_train_res, y_train_res)


In [19]:
from sklearn.metrics import classification_report, accuracy_score

y_val_pred = model.predict(X_val)
print("Train accuracy:", model.score(X_train_res, y_train_res))
print("Val accuracy:", accuracy_score(y_val, y_val_pred))
print(classification_report(y_val, y_val_pred, target_names=le.classes_))

Train accuracy: 0.9999661727893918
Val accuracy: 0.949375
              precision    recall  f1-score   support

        farm       0.82      0.82      0.82       168
      forest       0.97      0.99      0.98      1232
       grass       0.96      0.64      0.77        39
  impervious       0.89      0.89      0.89       134
     orchard       1.00      0.50      0.67         6
       water       0.85      0.81      0.83        21

    accuracy                           0.95      1600
   macro avg       0.92      0.77      0.83      1600
weighted avg       0.95      0.95      0.95      1600



In [20]:
test_df = pd.read_csv("hacktest.csv")
test_ID = test_df['ID']
test_df.drop(columns=['ID', 'Unnamed: 0'], inplace=True, errors='ignore')

In [21]:
test_imputed = imp.transform(test_df)
test_imputed_df = pd.DataFrame(test_imputed, columns=test_df.columns)

In [22]:
test_imputed_df['mean_ndvi'] = test_imputed_df.mean(axis=1)
test_imputed_df['std_ndvi'] = test_imputed_df.std(axis=1)
test_imputed_df['trend'] = test_imputed_df.apply(lambda row: np.polyfit(np.arange(len(row)), row, 1)[0], axis=1)


In [None]:
test_scaled = scaler.transform(test_imputed_df)