In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
data = pd.read_csv("Audiobooks_data.csv", header=None)
df = data.copy()
df.head()
df.isna().sum()
df.dtypes
df.describe()
X = df.iloc[:,1:-1]
y = df.iloc[:,-1]
print(X.shape,y.shape)
y.value_counts()
# For classification we need to do balancing
ones_df = df[df.iloc[:,-1] == 1]
zeros_df = df[df.iloc[:,-1] == 0].iloc[:len(ones_df)]
df_balanced = pd.concat([ones_df,zeros_df]).sample(frac=1).reset_index(drop=True)
df_balanced.head()
df_balanced.shape
X = df_balanced.iloc[:,1:-1]
y = df_balanced.iloc[:,-1]
print(X.shape,y.shape)
X_train, X_temp, y_train, y_temp = train_test_split(X,y,test_size=0.2, stratify=y, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)
print(f"Training data: {X_train.shape} and {y_train.shape}, no of 1's: {sum(y_train)}")
print(f"Validation data: {X_val.shape} and {y_val.shape}, no of 1's: {sum(y_val)}")
print(f"Testing data: {X_test.shape} and {y_test.shape}, no of 1's: {sum(y_test)}")
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(128, activation = 'relu'),
    Dropout(0.3),
    Dense(64, activation = 'relu'),
    Dropout(0.3),
    Dense(32, activation = 'relu'),
    Dense(1, activation = 'sigmoid')
])
opt = Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss = 'binary_crossentropy', metrics=['accuracy'])
early_stop = EarlyStopping(patience=5, restore_best_weights=True)
model.fit(X_train_scaled, y_train,
          validation_data=(X_val_scaled,y_val),
          epochs = 50,
          batch_size= 32,
          callbacks=[early_stop])
loss, accuracy = model.evaluate(X_test_scaled,y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")

(14084, 10) (14084,)
(4474, 10) (4474,)
Training data: (3579, 10) and (3579,), no of 1's: 1790
Validation data: (447, 10) and (447,), no of 1's: 223
Testing data: (448, 10) and (448,), no of 1's: 224
Epoch 1/50
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.7743 - loss: 0.5171 - val_accuracy: 0.8680 - val_loss: 0.3564
Epoch 2/50
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.8838 - loss: 0.3240 - val_accuracy: 0.8725 - val_loss: 0.3366
Epoch 3/50
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.8884 - loss: 0.3043 - val_accuracy: 0.8747 - val_loss: 0.3295
Epoch 4/50
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.8844 - loss: 0.2985 - val_accuracy: 0.8702 - val_loss: 0.3222
Epoch 5/50
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.9015 - loss: 0.2651 - val_accuracy: 0.8747 - val_lo