In [7]:
# Step 1: Imports
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Step 2: Load Data
train = pd.read_csv('hacktrain.csv')
test = pd.read_csv('hacktest.csv')

# Drop unnamed index column if present
train.drop(columns=['Unnamed: 0'], errors='ignore', inplace=True)
test.drop(columns=['Unnamed: 0'], errors='ignore', inplace=True)

# Step 3: NDVI Columns
ndvi_cols = [col for col in train.columns if col.endswith('_N')]

# Step 4: Fill Missing Values
train[ndvi_cols] = train[ndvi_cols].fillna(train[ndvi_cols].median())
test[ndvi_cols] = test[ndvi_cols].fillna(test[ndvi_cols].median())

# Step 5: Feature Engineering
for df in [train, test]:
    df['NDVI_mean'] = df[ndvi_cols].mean(axis=1)
    df['NDVI_std'] = df[ndvi_cols].std(axis=1)
    df['NDVI_max'] = df[ndvi_cols].max(axis=1)
    df['NDVI_min'] = df[ndvi_cols].min(axis=1)
    df['NDVI_range'] = df['NDVI_max'] - df['NDVI_min']
    df['NDVI_trend'] = df[ndvi_cols].apply(lambda row: np.polyfit(range(len(row)), row, 1)[0], axis=1)

# Step 6: Feature Set
feature_cols = ndvi_cols + ['NDVI_mean', 'NDVI_std', 'NDVI_max', 'NDVI_min', 'NDVI_range', 'NDVI_trend']

# Step 7: Normalize Features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train[feature_cols])
X_test_scaled = scaler.transform(test[feature_cols])

# Step 8: Encode Target Labels
le = LabelEncoder()
y = le.fit_transform(train['class'])

# Step 9: Train/Validation Split
X_tr, X_val, y_tr, y_val = train_test_split(X_train_scaled, y, test_size=0.2, random_state=42)

# Step 10: Train Logistic Regression Model
model = LogisticRegression(max_iter=1000, multi_class='multinomial', solver='lbfgs')
model.fit(X_tr, y_tr)

# Step 11: Validation Accuracy
val_preds = model.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, val_preds))

# Step 12: Final Model Training
model.fit(X_train_scaled, y)
test_preds = model.predict(X_test_scaled)
test_labels = le.inverse_transform(test_preds)

# Step 13: Submission File
submission = pd.DataFrame({'ID': test['ID'], 'class': test_labels})
submission.to_csv('submission.csv', index=False)
print("✅ submission.csv saved successfully")




Validation Accuracy: 0.915




✅ submission.csv saved successfully
