In [2]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

# Load training and test data
train_df = pd.read_csv('hacktrain.csv')
test_df = pd.read_csv('hacktest.csv')

# Separate features and target from training data
X_train = train_df.drop(columns=['ID', 'class'])
y_train = train_df['class']

# Prepare test data features
X_test = test_df.drop(columns=['ID'])
test_ids = test_df['ID']

# Step 1: Impute missing values
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Step 2: Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

# Step 3: Train Logistic Regression model
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, random_state=42)
model.fit(X_train_scaled, y_train)

# Step 4: Predict on test set
y_pred = model.predict(X_test_scaled)

# Step 5: Create submission DataFrame
submission_df = pd.DataFrame({
    'ID': test_ids,
    'class': y_pred
})

# Save submission file
submission_path = 'ndvi_logreg_submission_final.csv'
submission_df.to_csv(submission_path, index=False)

submission_path




'ndvi_logreg_submission_final.csv'