## NEURAL NETWORKS V0

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import json

# read datasets
train_source = pd.read_csv("./data/train_cleaned_v2.csv")
# dropping column valid
train_source = train_source.drop(['valid'], axis=1)
test_source = pd.read_csv("./data/testing_set.csv")

# new datasets
train_data = train_source.copy()
test_data = test_source.copy()

# Preprocess the Time column creating 3 new columns: day, month and year
train_data['day'] = pd.to_datetime(train_data['Time']).dt.day
train_data['month'] = pd.to_datetime(train_data['Time']).dt.month
train_data['year'] = pd.to_datetime(train_data['Time']).dt.year
test_data['day'] = pd.to_datetime(test_data['Time']).dt.day
test_data['month'] = pd.to_datetime(test_data['Time']).dt.month
test_data['year'] = pd.to_datetime(test_data['Time']).dt.year

# remove the columns that are not needed: train_idx, valid, Time, outlier and valid2 columns
train_data = train_data.drop(['train_idx', "Time"], axis=1)
test_data = test_data.drop(['test_idx', "Time"], axis=1)

# Split the dataset into features (X) and target variable (y)
X_train = train_data.drop("label", axis=1)
y_train = train_data["label"]

X_test = test_data

# Preprocess the features by scaling them
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create a neural network model
model = Sequential()
model.add(Dense(64, activation="relu", input_shape=(X_train_scaled.shape[1],)))
model.add(Dense(32, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

# Compile the model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Train the model
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32)

# Make predictions on the test data
predictions = []
for i in range(len(X_test_scaled)):
    X_sample = X_test_scaled[:i + 1]  # Include samples up to the current index
    probabilities = model.predict(X_sample)[-1][0]
    prediction = 1 if probabilities >= 0.5 else 0
    predictions.append(prediction)

# Create the dictionary with test_idx as keys and predicted labels as values
predictions_dict = {str(idx): int(label) for idx, label in zip(test_source['test_idx'], predictions)}

# Create the final JSON structure
output = {"target": predictions_dict}

# Save the JSON to a file
with open('./data/predsNN.json', 'w') as json_file:
    json.dump(output, json_file)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
