In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

# Load the training data from the CSV file
train_data = pd.read_csv('/content/drive/MyDrive/DATASET/train.csv')

# Separate the features (neural layers) and the target variable
X = train_data.iloc[:, 1:].values
y = train_data.iloc[:, 0].values

# Reshape the features to image-like format
X = X.reshape(X.shape[0], 20, 20, 3)

# Convert the target variable to categorical format
y = to_categorical(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Create the CNN model
model = Sequential()
model.add(Conv2D(128, kernel_size=(10, 10), activation='relu', input_shape=(20, 20, 3)))
model.add(Conv2D(64, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(2, activation='softmax'))

# Compile the model with Adam optimizer and a lower learning rate
optimizer = Adam(learning_rate=0.001)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Train the model for more epochs
model.fit(X_train, y_train, batch_size=32, epochs=30, validation_data=(X_val, y_val))

# Load the testing data from the CSV file
test_data = pd.read_csv('/content/drive/MyDrive/DATASET/test.csv')

# Separate the features from the testing data
X_test = test_data.iloc[:, 1:].values
X_test = X_test.reshape(X_test.shape[0], 20, 20, 3)

# Make predictions on the testing data
predictions = model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)

# Create a DataFrame with IDs and predictions
result_df = pd.DataFrame({'id': test_data['id'], 'labels': predicted_labels})

# Save the results to a CSV file
result_df.to_csv('/content/drive/MyDrive/DATASET/soln/solution_format.csv', index=False)

# Print the accuracy
accuracy = accuracy_score(y_val.argmax(axis=1), np.argmax(model.predict(X_val), axis=1))
print("Validation Accuracy:", accuracy)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Validation Accuracy: 0.8314285714285714


In [None]:
from sklearn.metrics import f1_score

# Make predictions on the validation data
val_predictions = model.predict(X_val)
val_predicted_labels = np.argmax(val_predictions, axis=1)

# Calculate the F1 score
f1 = f1_score(np.argmax(y_val, axis=1), val_predicted_labels)

print("Validation F1 Score:", f1)

Validation F1 Score: 0.6563106796116505
