In [3]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

#Load the Datasets
train_df = pd.read_csv("hacktrain.csv")
test_df = pd.read_csv("hacktest.csv")

print("Train shape:", train_df.shape)
print("Test shape:", test_df.shape)

train_df.head()

#Split Features and Labels
X_train = train_df.drop(columns=['ID', 'class'])
y_train = train_df['class']

X_test = test_df.drop(columns=['ID'])

#Handle Missing NDVI Values
imputer = SimpleImputer(strategy='mean')

X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

#Scale the Data
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

#Train Logistic Regression Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)

#Predict on Test Data
preds = model.predict(X_test_scaled)

#Prepare and Save Submission File
submission = pd.DataFrame({
    'ID': test_df['ID'],
    'class': preds
})

submission.to_csv("submission.csv", index=False)
print("Submission file saved as submission.csv")




Train shape: (8000, 30)
Test shape: (2845, 29)
Submission file saved as submission.csv
