In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten, Bidirectional, GRU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

def load_and_preprocess():
    df = pd.read_csv("hacktrain.csv")
    df.fillna(df.mean(numeric_only=True), inplace=True)
    df.drop(columns=['ID'], inplace=True)

    label_encoder = LabelEncoder()
    df['class'] = label_encoder.fit_transform(df['class'])

    X = df.drop(columns=['class']).values
    y = df['class'].values

    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    X = X.reshape(X.shape[0], X.shape[1], 1)
    y = to_categorical(y)
    return X, y, label_encoder, scaler

def create_bigru_model(input_shape, num_classes):
    model = Sequential([
        Bidirectional(GRU(64, return_sequences=True), input_shape=input_shape),
        Dropout(0.3),
        Bidirectional(GRU(32)),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])

    model.compile(optimizer=Adam(learning_rate=0.001),
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])
    return model

def create_cnn_model(input_shape, num_classes):
    model = Sequential([
        Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        Conv1D(filters=128, kernel_size=3, activation='relu'),
        MaxPooling1D(pool_size=2),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])

    model.compile(optimizer=Adam(learning_rate=0.001),
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])
    return model

def kfold_cross_validation(X, y, model_type='bigru', n_splits=5, epochs=50, batch_size=32):
    kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    fold_no = 1
    histories = []

    y_labels = np.argmax(y, axis=1)

    for train_idx, val_idx in kfold.split(X, y_labels):
        print(f'\nTraining fold {fold_no}...')

        if model_type == 'bigru':
            model = create_bigru_model((X.shape[1], X.shape[2]), y.shape[1])
        else:
            model = create_cnn_model((X.shape[1], X.shape[2]), y.shape[1])

        es = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

        history = model.fit(
            X[train_idx], y[train_idx],
            validation_data=(X[val_idx], y[val_idx]),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=[es],
            verbose=1
        )

        histories.append(history)
        fold_no += 1

    return model, histories

if __name__ == "__main__":
    X, y, label_encoder, scaler = load_and_preprocess()

    trained_model, histories = kfold_cross_validation(X, y, model_type='bigru')

    test_data = pd.read_csv("hacktest.csv")
    ID = test_data['ID']
    test_data.drop(['ID'], axis=1, inplace=True)

    X_test = scaler.transform(test_data.values)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

    y_pred_probs = trained_model.predict(X_test)
    y_pred = np.argmax(y_pred_probs, axis=1)
    y_decoded = label_encoder.inverse_transform(y_pred)

    # Save results
    result = pd.DataFrame({
        'ID': ID,
        'class': y_decoded
    })
    result.to_csv("bigru.csv", index=False)


Training fold 1...


  super().__init__(**kwargs)


Epoch 1/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 14ms/step - accuracy: 0.7816 - loss: 0.8525 - val_accuracy: 0.9425 - val_loss: 0.1834
Epoch 2/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.9393 - loss: 0.1974 - val_accuracy: 0.9500 - val_loss: 0.1574
Epoch 3/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.9515 - loss: 0.1663 - val_accuracy: 0.9431 - val_loss: 0.1740
Epoch 4/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - accuracy: 0.9566 - loss: 0.1397 - val_accuracy: 0.9506 - val_loss: 0.1424
Epoch 5/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.9555 - loss: 0.1337 - val_accuracy: 0.9600 - val_loss: 0.1248
Epoch 6/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.9594 - loss: 0.1285 - val_accuracy: 0.9606 - val_loss: 0.1316
Epoch 7/50
[1m200/200

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Bidirectional, GRU, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import classification_report
from tensorflow.keras.optimizers import Adam

df = pd.read_csv("hacktrain.csv")
df.fillna(df.mean(numeric_only=True), inplace=True)
df.drop(columns=['ID'], inplace=True)
label_encoder = LabelEncoder()
df['class'] = label_encoder.fit_transform(df['class'])
X = df.drop(columns=['class']).values
y = df['class'].values
y_encoded = to_categorical(y)

n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
fold_scores = []

for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
    X_train, X_val = X[train_idx], X[val_idx]
    y_train, y_val = y_encoded[train_idx], y_encoded[val_idx]

    X_train_reshaped = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    X_val_reshaped = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)

    model = Sequential([
        Bidirectional(GRU(128, return_sequences=True), input_shape=(X_train.shape[1], 1)),
        BatchNormalization(),
        Bidirectional(GRU(64)),
        BatchNormalization(),
        Dense(256, activation='relu'),
        Dropout(0.4),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(len(label_encoder.classes_), activation='softmax')
    ])

    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

    model.fit(X_train_reshaped, y_train, epochs=50, batch_size=64,
              validation_data=(X_val_reshaped, y_val),
              callbacks=[early_stopping, lr_reducer], verbose=0)

    y_pred = model.predict(X_val_reshaped)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_val_classes = np.argmax(y_val, axis=1)
    fold_scores.append(classification_report(y_val_classes, y_pred_classes,
                                            labels=list(range(len(label_encoder.classes_))),
                                            target_names=label_encoder.classes_, output_dict=True))

test_data = pd.read_csv("hacktest.csv")
ID = test_data['ID']
test_data.drop(['ID'], axis=1, inplace=True)
X_test = test_data.values
X_test_reshaped = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
y_test_pred = model.predict(X_test_reshaped)
y_test_classes = np.argmax(y_test_pred, axis=1)
y_decoded = label_encoder.inverse_transform(y_test_classes)

result = pd.DataFrame({'ID': ID, 'class': y_decoded})
result.to_csv("submission_bigru.csv", index=False)

  super().__init__(**kwargs)


[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m31/89[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 3ms/step

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten, Bidirectional, GRU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

def load_and_preprocess():
    df = pd.read_csv("hacktrain.csv")
    df.fillna(df.mean(numeric_only=True), inplace=True)
    df.drop(columns=['ID'], inplace=True)

    label_encoder = LabelEncoder()
    df['class'] = label_encoder.fit_transform(df['class'])

    X = df.drop(columns=['class']).values
    y = df['class'].values

    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    X = X.reshape(X.shape[0], X.shape[1], 1)
    y = to_categorical(y)
    return X, y, label_encoder, scaler

def create_bigru_model(input_shape, num_classes):
    model = Sequential([
        Bidirectional(GRU(64, return_sequences=True), input_shape=input_shape),
        Dropout(0.3),
        Bidirectional(GRU(32)),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])

    model.compile(optimizer=Adam(learning_rate=0.001),
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])
    return model

def create_cnn_model(input_shape, num_classes):
    model = Sequential([
        Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        Conv1D(filters=128, kernel_size=3, activation='relu'),
        MaxPooling1D(pool_size=2),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])

    model.compile(optimizer=Adam(learning_rate=0.001),
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])
    return model

def kfold_cross_validation(X, y, model_type='bigru', n_splits=5, epochs=50, batch_size=32):
    kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    fold_no = 1
    histories = []

    y_labels = np.argmax(y, axis=1)

    for train_idx, val_idx in kfold.split(X, y_labels):
        print(f'\nTraining fold {fold_no}...')

        if model_type == 'bigru':
            model = create_bigru_model((X.shape[1], X.shape[2]), y.shape[1])
        else:
            model = create_cnn_model((X.shape[1], X.shape[2]), y.shape[1])

        es = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

        history = model.fit(
            X[train_idx], y[train_idx],
            validation_data=(X[val_idx], y[val_idx]),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=[es],
            verbose=1
        )

        histories.append(history)
        fold_no += 1

    return model, histories

if __name__ == "__main__":
    X, y, label_encoder, scaler = load_and_preprocess()

    trained_model, histories = kfold_cross_validation(X, y, model_type='cnn')

    test_data = pd.read_csv("hacktest.csv")
    ID = test_data['ID']
    test_data.drop(['ID'], axis=1, inplace=True)

    X_test = scaler.transform(test_data.values)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

    y_pred_probs = trained_model.predict(X_test)
    y_pred = np.argmax(y_pred_probs, axis=1)
    y_decoded = label_encoder.inverse_transform(y_pred)

    result = pd.DataFrame({
        'ID': ID,
        'class': y_decoded
    })
    result.to_csv("cnn.csv", index=False)


Training fold 1...
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.7983 - loss: 0.6754 - val_accuracy: 0.9231 - val_loss: 0.2555
Epoch 2/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9265 - loss: 0.2468 - val_accuracy: 0.9500 - val_loss: 0.1655
Epoch 3/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9426 - loss: 0.1903 - val_accuracy: 0.9575 - val_loss: 0.1517
Epoch 4/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9520 - loss: 0.1602 - val_accuracy: 0.9606 - val_loss: 0.1338
Epoch 5/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9568 - loss: 0.1492 - val_accuracy: 0.9681 - val_loss: 0.1272
Epoch 6/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9615 - loss: 0.1302 - val_accuracy: 0.9638 - val_loss: 0.1204
Epoch 7/50
[1m200/200[0m [32m━━━━━━━

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import KNNImputer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, GRU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

def load_and_preprocess():
    df = pd.read_csv("hacktrain.csv")
    df.drop(columns=['ID'], inplace=True)

    imputer = KNNImputer(n_neighbors=5)
    numeric_cols = df.select_dtypes(include=np.number).columns
    df[numeric_cols] = imputer.fit_transform(df[numeric_cols])

    label_encoder = LabelEncoder()
    df['class'] = label_encoder.fit_transform(df['class'])

    X = df.drop(columns=['class']).values
    y = df['class'].values

    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    X = X.reshape(X.shape[0], X.shape[1], 1)
    y = to_categorical(y)
    return X, y, label_encoder, scaler, imputer

def create_bigru_model(input_shape, num_classes):
    model = Sequential([
        Bidirectional(GRU(128, return_sequences=True), input_shape=input_shape),
        Dropout(0.4),
        Bidirectional(GRU(64)),
        Dropout(0.4),
        Dense(128, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])

    model.compile(optimizer=Adam(learning_rate=0.0005),
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])
    return model

def kfold_cross_validation(X, y, n_splits=5, epochs=100, batch_size=32):
    kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    fold_no = 1
    histories = []
    y_labels = np.argmax(y, axis=1)

    for train_idx, val_idx in kfold.split(X, y_labels):
        model = create_bigru_model((X.shape[1], X.shape[2]), y.shape[1])
        es = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

        history = model.fit(
            X[train_idx], y[train_idx],
            validation_data=(X[val_idx], y[val_idx]),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=[es],
            verbose=1
        )
        histories.append(history)
        fold_no += 1

    return model, histories

if __name__ == "__main__":
    X, y, label_encoder, scaler, imputer = load_and_preprocess()
    trained_model, _ = kfold_cross_validation(X, y)

    test_data = pd.read_csv("hacktest.csv")
    ID = test_data['ID']
    test_data.drop(['ID'], axis=1, inplace=True)

    numeric_cols = test_data.select_dtypes(include=np.number).columns
    test_data[numeric_cols] = imputer.transform(test_data[numeric_cols])

    X_test = scaler.transform(test_data.values)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

    y_pred = np.argmax(trained_model.predict(X_test), axis=1)
    y_decoded = label_encoder.inverse_transform(y_pred)

    pd.DataFrame({'ID': ID, 'class': y_decoded}).to_csv("bigru_1.csv", index=False)

Epoch 1/100


  super().__init__(**kwargs)


[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.7722 - loss: 0.9069 - val_accuracy: 0.9431 - val_loss: 0.1875
Epoch 2/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.9386 - loss: 0.1973 - val_accuracy: 0.9500 - val_loss: 0.1453
Epoch 3/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.9501 - loss: 0.1549 - val_accuracy: 0.9538 - val_loss: 0.1337
Epoch 4/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.9568 - loss: 0.1432 - val_accuracy: 0.9500 - val_loss: 0.1462
Epoch 5/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.9542 - loss: 0.1402 - val_accuracy: 0.9544 - val_loss: 0.1262
Epoch 6/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.9600 - loss: 0.1309 - val_accuracy: 0.9600 - val_loss: 0.1206
Epoch 7/100
[1m200/200[0m 

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import KNNImputer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, GRU, BatchNormalization, LayerNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2

def load_and_preprocess():
    df = pd.read_csv("hacktrain.csv")
    df.drop(columns=['ID'], inplace=True)

    imputer = KNNImputer(n_neighbors=3)
    numeric_cols = df.select_dtypes(include=np.number).columns
    df[numeric_cols] = imputer.fit_transform(df[numeric_cols])

    label_encoder = LabelEncoder()
    df['class'] = label_encoder.fit_transform(df['class'])

    X = df.drop(columns=['class']).values
    y = df['class'].values

    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    X = X.reshape(X.shape[0], X.shape[1], 1)
    y = to_categorical(y)
    return X, y, label_encoder, scaler, imputer

def create_bigru_model(input_shape, num_classes):
    model = Sequential([
        Bidirectional(GRU(256, return_sequences=True, kernel_regularizer=l2(0.001)), input_shape=input_shape),
        LayerNormalization(),
        Dropout(0.3),
        Bidirectional(GRU(128, return_sequences=True, kernel_regularizer=l2(0.001))),
        LayerNormalization(),
        Dropout(0.3),
        Bidirectional(GRU(64)),
        BatchNormalization(),
        Dropout(0.2),
        Dense(256, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.2),
        Dense(128, activation='relu', kernel_regularizer=l2(0.001)),
        Dense(num_classes, activation='softmax')
    ])

    model.compile(optimizer=Adam(learning_rate=0.0002),
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])
    return model

def kfold_cross_validation(X, y, n_splits=5, epochs=50, batch_size=64):
    kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    fold_no = 1
    histories = []
    y_labels = np.argmax(y, axis=1)

    for train_idx, val_idx in kfold.split(X, y_labels):
        model = create_bigru_model((X.shape[1], X.shape[2]), y.shape[1])
        es = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
        lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

        history = model.fit(
            X[train_idx], y[train_idx],
            validation_data=(X[val_idx], y[val_idx]),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=[es, lr_reducer],
            verbose=1
        )
        histories.append(history)
        fold_no += 1

    return model, histories

if __name__ == "__main__":
    X, y, label_encoder, scaler, imputer = load_and_preprocess()
    trained_model, _ = kfold_cross_validation(X, y)

    test_data = pd.read_csv("hacktest.csv")
    ID = test_data['ID']
    test_data.drop(['ID'], axis=1, inplace=True)

    numeric_cols = test_data.select_dtypes(include=np.number).columns
    test_data[numeric_cols] = imputer.transform(test_data[numeric_cols])

    X_test = scaler.transform(test_data.values)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

    y_pred = np.argmax(trained_model.predict(X_test), axis=1)
    y_decoded = label_encoder.inverse_transform(y_pred)

    pd.DataFrame({'ID': ID, 'class': y_decoded}).to_csv("improved_bigru.csv", index=False)

  super().__init__(**kwargs)


Epoch 1/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 29ms/step - accuracy: 0.5863 - loss: 2.3474 - val_accuracy: 0.9038 - val_loss: 1.4492 - learning_rate: 2.0000e-04
Epoch 2/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 25ms/step - accuracy: 0.9010 - loss: 1.4200 - val_accuracy: 0.9219 - val_loss: 1.2519 - learning_rate: 2.0000e-04
Epoch 3/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - accuracy: 0.9297 - loss: 1.2284 - val_accuracy: 0.9456 - val_loss: 1.1052 - learning_rate: 2.0000e-04
Epoch 4/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - accuracy: 0.9396 - loss: 1.1246 - val_accuracy: 0.9431 - val_loss: 1.0835 - learning_rate: 2.0000e-04
Epoch 5/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.9464 - loss: 1.0487 - val_accuracy: 0.9550 - val_loss: 0.9644 - learning_rate: 2.0000e-04
Epoch 6/50
[1m100/100[0m [32m━━━━━━━━━━━━━

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import KNNImputer
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_predict
from scipy.signal import savgol_filter

def load_data():
    train = pd.read_csv("hacktrain.csv")
    test = pd.read_csv("hacktest.csv")
    return train, test
train, test = load_data()
train

Unnamed: 0.1,Unnamed: 0,ID,class,20150720_N,20150602_N,20150517_N,20150501_N,20150415_N,20150330_N,20150314_N,...,20140610_N,20140525_N,20140509_N,20140423_N,20140407_N,20140322_N,20140218_N,20140202_N,20140117_N,20140101_N
0,0,1,water,637.5950,658.668,-1882.030,-1924.36,997.904,-1739.990,630.087,...,,-1043.160,-1942.490,267.138,,,211.328,-2203.020,-1180.19,433.906
1,1,2,water,634.2400,593.705,-1625.790,-1672.32,914.198,-692.386,707.626,...,,-933.934,-625.385,120.059,364.858,476.972,220.878,-2250.000,-1360.56,524.075
2,3,4,water,58.0174,-1599.160,,-1052.63,,-1564.630,,...,-1025.880,368.622,,-1227.800,304.621,,369.214,-2202.120,,-1343.550
3,4,5,water,72.5180,,380.436,-1256.93,515.805,-1413.180,-802.942,...,-1813.950,155.624,,-924.073,432.150,282.833,298.320,-2197.360,,-826.727
4,7,8,water,1136.4400,,,1647.83,1935.800,,2158.980,...,1535.000,1959.430,-279.317,-384.915,-113.406,1020.720,1660.650,-116.801,-568.05,-1357.140
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7995,10537,10538,impervious,1207.7000,984.620,,1166.25,937.478,1072.700,823.896,...,1117.740,1176.600,1044.110,,369.082,465.843,362.882,979.795,,433.659
7996,10538,10539,impervious,2170.3500,1419.720,1361.000,1478.71,983.911,1262.110,1422.860,...,984.634,2128.970,1379.660,,762.633,485.204,446.724,771.747,1589.06,506.936
7997,10541,10542,impervious,1895.6800,1454.740,,1033.56,1930.380,1057.150,1471.600,...,888.408,2093.020,1232.110,1190.830,1441.460,1170.880,1095.000,1818.650,2501.72,1247.770
7998,10542,10543,impervious,3465.7400,1283.320,413.412,4391.05,1146.820,4473.050,1614.750,...,5833.760,4047.320,4515.800,433.177,277.296,744.143,,3759.710,,388.346


In [None]:

def preprocess(df):
    df = df.copy()
    ndvi_cols = [col for col in df.columns if '_N' in col]

    # Temporal interpolation for NDVI
    df[ndvi_cols] = df[ndvi_cols].interpolate(axis=1, limit_direction='both')

    # Apply Savitzky-Golay filter for smoothing
    df[ndvi_cols] = savgol_filter(df[ndvi_cols], window_length=5, polyorder=2, axis=1)

    # Extract temporal features
    df['ndvi_mean'] = df[ndvi_cols].mean(axis=1)
    df['ndvi_std'] = df[ndvi_cols].std(axis=1)
    df['ndvi_amp'] = df[ndvi_cols].max(axis=1) - df[ndvi_cols].min(axis=1)

    # Seasonal features (assuming columns are in chronological order)
    spring = ndvi_cols[:7]  # First 7 time points as spring
    summer = ndvi_cols[7:14]
    fall = ndvi_cols[14:21]
    winter = ndvi_cols[21:]

    df['spring_mean'] = df[spring].mean(axis=1)
    df['summer_mean'] = df[summer].mean(axis=1)
    df['fall_mean'] = df[fall].mean(axis=1)
    df['winter_mean'] = df[winter].mean(axis=1)

    return df

def main():
    train, test = load_data()

    # Preprocess
    train_processed = preprocess(train)
    test_processed = preprocess(test)

    # Prepare data
    X_train = train_processed.drop(columns=['ID', 'class'])
    y_train = train_processed['class']
    X_test = test_processed.drop(columns=['ID'])

    # Encode labels
    le = LabelEncoder()
    y_train = le.fit_transform(y_train)

    # Build pipeline
    pipeline = make_pipeline(
        KNNImputer(n_neighbors=5),
        StandardScaler(),
        SelectKBest(f_classif, k=20),
        LogisticRegression(multi_class='multinomial',
                          solver='lbfgs',
                          max_iter=1000,
                          C=0.1,
                          penalty='l2')
    )

    # Train and predict
    pipeline.fit(X_train, y_train)
    test_pred = pipeline.predict(X_test)
    test_pred_labels = le.inverse_transform(test_pred)

    # Save results
    submission = pd.DataFrame({'ID': test['ID'], 'class': test_pred_labels})
    submission.to_csv("lr_submission.csv", index=False)

if __name__ == "__main__":
    main()



In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import KNNImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from scipy.signal import savgol_filter

df = pd.read_csv("hacktrain.csv")
ID = df['ID']
df.drop(columns=['ID'], inplace=True)

numeric_cols = df.select_dtypes(include=np.number).columns
df[numeric_cols] = savgol_filter(df[numeric_cols], window_length=5, polyorder=2, axis=1)
imputer = KNNImputer(n_neighbors=3)
df[numeric_cols] = imputer.fit_transform(df[numeric_cols])

df['mean_ndvi'] = df[numeric_cols].mean(axis=1)
df['std_ndvi'] = df[numeric_cols].std(axis=1)
df['trend'] = df[numeric_cols].apply(lambda x: np.polyfit(range(len(x)), x, 1)[0], axis=1)

scaler = StandardScaler()
X = scaler.fit_transform(df.drop(columns=['class']))
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['class'])

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
val_reports = []

for train_idx, val_idx in skf.split(X, y):
    X_train, X_val = X[train_idx], X[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]

    model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, C=0.5, penalty='l2')
    model.fit(X_train, y_train)

    y_val_pred = model.predict(X_val)
    report = classification_report(y_val, y_val_pred, labels=list(range(len(label_encoder.classes_))),
                                  target_names=label_encoder.classes_, output_dict=False)
    val_reports.append(report)
    print(f"Fold Validation Classification Report:\n{report}")

test_data = pd.read_csv("hacktest.csv")
test_ID = test_data['ID']
test_data.drop(['ID'], axis=1, inplace=True)

test_data[numeric_cols] = imputer.transform(test_data[numeric_cols])
test_data['mean_ndvi'] = test_data[numeric_cols].mean(axis=1)
test_data['std_ndvi'] = test_data[numeric_cols].std(axis=1)
test_data['trend'] = test_data[numeric_cols].apply(lambda x: np.polyfit(range(len(x)), x, 1)[0], axis=1)

X_test_final = scaler.transform(test_data)
y_test_pred = model.predict(X_test_final)
y_decoded = label_encoder.inverse_transform(y_test_pred)

result = pd.DataFrame({'ID': test_ID, 'class': y_decoded})
result.to_csv("submission_lr.csv", index=False)



Fold Validation Classification Report:
              precision    recall  f1-score   support

        farm       0.63      0.46      0.53       168
      forest       0.90      0.97      0.93      1232
       grass       0.54      0.36      0.43        39
  impervious       0.80      0.62      0.70       134
     orchard       1.00      0.17      0.29         6
       water       1.00      0.67      0.80        21

    accuracy                           0.87      1600
   macro avg       0.81      0.54      0.61      1600
weighted avg       0.86      0.87      0.86      1600



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold Validation Classification Report:
              precision    recall  f1-score   support

        farm       0.66      0.46      0.54       168
      forest       0.91      0.97      0.94      1232
       grass       0.48      0.26      0.33        39
  impervious       0.71      0.70      0.71       134
     orchard       0.00      0.00      0.00         6
       water       0.91      0.48      0.62        21

    accuracy                           0.87      1600
   macro avg       0.61      0.48      0.52      1600
weighted avg       0.85      0.87      0.85      1600



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold Validation Classification Report:
              precision    recall  f1-score   support

        farm       0.58      0.45      0.51       168
      forest       0.90      0.97      0.93      1232
       grass       0.67      0.41      0.51        39
  impervious       0.79      0.56      0.66       134
     orchard       0.00      0.00      0.00         6
       water       0.86      0.57      0.69        21

    accuracy                           0.86      1600
   macro avg       0.63      0.49      0.55      1600
weighted avg       0.84      0.86      0.85      1600



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold Validation Classification Report:
              precision    recall  f1-score   support

        farm       0.66      0.46      0.54       169
      forest       0.89      0.97      0.93      1232
       grass       0.47      0.21      0.29        39
  impervious       0.69      0.63      0.66       133
     orchard       0.00      0.00      0.00         6
       water       1.00      0.29      0.44        21

    accuracy                           0.86      1600
   macro avg       0.62      0.43      0.48      1600
weighted avg       0.84      0.86      0.84      1600

Fold Validation Classification Report:
              precision    recall  f1-score   support

        farm       0.67      0.45      0.54       168
      forest       0.89      0.97      0.93      1231
       grass       0.57      0.30      0.39        40
  impervious       0.74      0.65      0.69       134
     orchard       1.00      0.17      0.29         6
       water       0.67      0.29      0.40        21


In [None]:
result

Unnamed: 0,ID,class
0,1,water
1,2,forest
2,3,forest
3,4,farm
4,5,forest
...,...,...
2840,2841,water
2841,2842,water
2842,2843,water
2843,2844,water
