In [None]:
import tensorflow as tf
# import tensorflow_datasets as tfds
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.metrics import roc_curve,confusion_matrix
from sklearn.model_selection import train_test_split,RandomizedSearchCV

tf.random.set_seed(42)

df = pd.read_csv("/content/train.csv")
#data preparation
def preprocess(df):
    df_numeric = df.select_dtypes(include=int)
    df_object = df.select_dtypes(exclude= int)
    freq = df_object['job'].value_counts(normalize=True).to_dict()
    df_object["job"] = df['job'].map(freq)
    job = df_object['job']
    # object Encoding
    Encoder = OneHotEncoder(sparse_output=False)
    df_converted = Encoder.fit_transform(df_object.drop('job',axis = 1).values)

    df_object = pd.DataFrame(df_converted,index = df_object.index)
    df_object["job"] = job
    df = pd.concat((df_object,df_numeric),axis=1)
    return df
df = preprocess(df)

from sklearn.utils import class_weight
'''Since the values of 1s and 0s are imbalanced
  their weights must be increased or decreased '''
class_weights = class_weight.compute_class_weight(
    class_weight="balanced",
    classes=np.unique(df["y"]),
    y=df["y"]
)
class_weight_dict = dict(enumerate(class_weights))


def prepare_Data(Dataf):
  X = Dataf.drop("y",axis = 1)
  y = Dataf["y"]
  Xtrain,ytrain = X[:600000],y[:600000]
  Xvalid,yvalid = X[600000:],y[600000:]
  Xtrain_data = tf.data.Dataset.from_tensor_slices((Xtrain,ytrain)).shuffle(buffer_size=10000).batch(100).prefetch(1)
  Xvalid_data = tf.data.Dataset.from_tensor_slices((Xvalid,yvalid)).shuffle(buffer_size=10000).batch(100).prefetch(1)
  return Xtrain_data,Xvalid_data


#Model making
class MyModel(tf.keras.Model):
  def __init__(self,input_shape = None,**kwargs):
    super().__init__(**kwargs)
    self.Layer1 = tf.keras.layers.Dense(256,activation=tf.keras.activations.leaky_relu)
    self.Layer2 = tf.keras.layers.Dense(128,activation=tf.keras.activations.leaky_relu)
    self.Layer3 = tf.keras.layers.Dense(64,activation=tf.keras.activations.leaky_relu)
    self.s_layer1 = tf.keras.layers.Dense(64)
    self.s_layer2 = tf.keras.layers.Dense(64,activation=tf.keras.activations.leaky_relu)
    self.output_layer = tf.keras.layers.Dense(2,activation="softmax")
    self.batch1_ = tf.keras.layers.BatchNormalization()
    self.batch2_ = tf.keras.layers.BatchNormalization()
    self.drop_ = tf.keras.layers.Dropout(0.3)

  def call(self,X,training = False):
    X = self.batch1_(X,training = training)
    X = self.Layer1(X)
    X = self.batch2_(X,training = training)
    X = self.drop_(X,training = training)
    X = self.Layer2(X)
    X = self.Layer3(X)
    X = self.drop_(X,training = training)

    L1_star = self.s_layer1(X)
    L2_star = self.s_layer2(L1_star)

    combined_X = tf.concat([X,L2_star],axis=-1)

    return self.output_layer(combined_X)


early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=6,
    restore_best_weights=True
)

lr_decay = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.01,decay_rate=0.01,decay_steps=7000)

model = MyModel()
opti = tf.keras.optimizers.Adam(learning_rate=lr_decay)
model.compile(optimizer = opti, loss="sparse_categorical_crossentropy", metrics=["accuracy"])

Xtrain,Xvalid = prepare_Data(df)

model.fit(Xtrain,validation_data = Xvalid,
    epochs=50,
    batch_size=32,
    callbacks=[early_stop],
    class_weight = class_weight_dict
)
# Test anaylysis
test_frame = pd.read_csv("/content/test.csv")[:50000]
test_frame = preprocess(test_frame)
def make_test_data(frame):
  X = frame.copy()
  data = tf.data.Dataset.from_tensor_slices(dict(X)).batch(100).prefetch(1)
  return data

Xtest = make_test_data(test_frame)

y_pred_probs = model.predict(Xtest)
y_pred = np.argmax(y_pred_probs,axis = 1)
plt.figure(figsize = (12,12))
plt.subplot(1,2,1)
sns.countplot(x = y_pred_probs)

plt.subplot(1,2,2)
sns.histplot(y_pred_probs[:,1], bins=30, kde=True)
plt.show()