<a href="https://colab.research.google.com/github/DSGP-Group-1-EAPS/BackupDataset/blob/main/Artificial_Neural_Networks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Read the CSV file
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/DSGP_COURSEWORK/Absenteeism_at_work_Project - Copy.csv')

# Clean the "Work load Average/day" column by removing commas and converting to numeric
df["Work load Average/day "] = df["Work load Average/day "].str.replace(',', '').astype(float)

# Replace missing values with the mean of each column
df.fillna(df.mean().round(0), inplace=True)

# Separate features and target variable
x = df.iloc[:, :15].to_numpy()
y = df["Absenteeism time in hours"].to_numpy()

# Split the data into training and testing sets
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)

# Standardize the features (optional, but often recommended for neural networks)
scaler = StandardScaler()
xtrain = scaler.fit_transform(xtrain)
xtest = scaler.transform(xtest)

# Build the neural network model
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(xtrain.shape[1],)))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='linear'))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Train the model
history = model.fit(xtrain, ytrain, epochs=50, batch_size=32, validation_data=(xtest, ytest), verbose=2)

# Make predictions
ytrain_pred = model.predict(xtrain)
ytest_pred = model.predict(xtest)

# Evaluate the model
train_r2 = r2_score(ytrain, ytrain_pred)
test_r2 = r2_score(ytest, ytest_pred)
train_mse = mean_squared_error(ytrain, ytrain_pred)
test_mse = mean_squared_error(ytest, ytest_pred)
train_mae = mean_absolute_error(ytrain, ytrain_pred)
test_mae = mean_absolute_error(ytest, ytest_pred)

print(f"Train R-squared: {train_r2}")
print(f"Test R-squared: {test_r2}")
print(f"Train MSE: {train_mse}")
print(f"Test MSE: {test_mse}")
print(f"Train MAE: {train_mae}")
print(f"Test MAE: {test_mae}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 1/50
19/19 - 1s - loss: 231.7076 - mae: 6.4655 - val_loss: 125.7079 - val_mae: 4.9620 - 844ms/epoch - 44ms/step
Epoch 2/50
19/19 - 0s - loss: 201.6383 - mae: 5.4091 - val_loss: 110.4126 - val_mae: 5.2605 - 65ms/epoch - 3ms/step
Epoch 3/50
19/19 - 0s - loss: 192.7815 - mae: 6.5754 - val_loss: 107.4698 - val_mae: 5.5655 - 65ms/epoch - 3ms/step
Epoch 4/50
19/19 - 0s - loss: 186.8381 - mae: 5.5221 - val_loss: 103.7317 - val_mae: 4.8103 - 50ms/epoch - 3ms/step
Epoch 5/50
19/19 - 0s - loss: 181.5662 - mae: 5.9676 - val_loss: 104.2166 - val_mae: 5.5421 - 63ms/epoch - 3ms/step
Epoch 6/50
19/19 - 0s - loss: 178.7013 - mae: 6.0266 - val_loss: 104.1144 - val_mae: 5.3233 - 49ms/epoch - 3ms/step
Epoch 7/50
19/19 - 0s - loss: 175.5725 - mae: 6.0856 - val_loss: 104.6715 - val_mae: 5.3665 - 65ms/epoch - 3ms/step
Epoch 8/50
19/19 - 0s - loss: 173.3838 - mae: 5.8062 - va