<a href="https://colab.research.google.com/github/Swastika0509/Summer-Analytics-2025-Swastika/blob/main/Summer_Analytics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [None]:
train_df = pd.read_csv("hacktrain.csv")

In [None]:
ndvi_cols = [col for col in train_df.columns if col.endswith('_N')]

In [None]:
train_df[ndvi_cols] = train_df[ndvi_cols].apply(pd.to_numeric, errors='coerce')
train_df[ndvi_cols] = train_df[ndvi_cols].interpolate(axis=1, limit_direction='both')

In [None]:
imputer = SimpleImputer(strategy='mean')
train_df[ndvi_cols] = imputer.fit_transform(train_df[ndvi_cols])

In [None]:
train_df['NDVI_mean'] = train_df[ndvi_cols].mean(axis=1)
train_df['NDVI_std'] = train_df[ndvi_cols].std(axis=1)
train_df['NDVI_max'] = train_df[ndvi_cols].max(axis=1)
train_df['NDVI_min'] = train_df[ndvi_cols].min(axis=1)
train_df['NDVI_trend'] = train_df[ndvi_cols].iloc[:, -1] - train_df[ndvi_cols].iloc[:, 0]

In [None]:
label_encoder = LabelEncoder()
train_df['class'] = label_encoder.fit_transform(train_df['class'])

In [None]:
X = train_df.drop(columns=['class', 'ID'], errors='ignore')
y = train_df['class']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=42) #Train test split

In [None]:
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
model.fit(X_train, y_train)

In [None]:
y_val_pred = model.predict(X_val)

In [None]:
print(classification_report(y_val, y_val_pred, target_names=label_encoder.classes_))

In [None]:
test_df = pd.read_csv("hacktest.csv")
submission_ids = test_df['ID']

In [None]:
ndvi_cols = [col for col in test_df.columns if col.endswith('_N')]

In [None]:
test_df[ndvi_cols] = test_df[ndvi_cols].apply(pd.to_numeric, errors='coerce')
test_df[ndvi_cols] = test_df[ndvi_cols].interpolate(axis=1, limit_direction='both')
test_df[ndvi_cols] = imputer.transform(test_df[ndvi_cols])

In [None]:
test_df['NDVI_mean'] = test_df[ndvi_cols].mean(axis=1)
test_df['NDVI_std'] = test_df[ndvi_cols].std(axis=1)
test_df['NDVI_max'] = test_df[ndvi_cols].max(axis=1)
test_df['NDVI_min'] = test_df[ndvi_cols].min(axis=1)
test_df['NDVI_trend'] = test_df[ndvi_cols].iloc[:, -1] - test_df[ndvi_cols].iloc[:, 0]

In [None]:
X_test = test_df[X.columns]  # exact same column order
X_test_scaled = scaler.transform(X_test)

In [None]:
test_preds = model.predict(X_test_scaled)
test_labels = label_encoder.inverse_transform(test_preds)

In [None]:
submission_df = pd.DataFrame({
    'ID': submission_ids,
    'class': test_labels
})

submission_df.to_csv("submission.csv", index=False)
print("saved")