# PCA

In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv("/content/default of credit card clients.csv")
df.head()

In [None]:
df = pd.read_csv("/content/default of credit card clients.csv", header=1).iloc[:,1:]
print(df.shape)
df.head()

In [None]:
X, y = df.iloc[:,:-1], df.iloc[:,-1]
print(X.shape, y.shape, len(np.where(y==0)[0]), len(np.where(y==1)[0]))

In [None]:
from sklearn import model_selection
from sklearn.preprocessing import StandardScaler

x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2, stratify=y)

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=5)
pca.fit(x_train)

In [None]:
pca.explained_variance_ratio_*100

In [None]:
x_test_transform = pca.transform(x_test)
print(x_test_transform.shape)

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(7,7))
view_dim = x_test_transform[:,:2]
ax.scatter(*view_dim.transpose(), c=y_test)

In [None]:
fig, ax = plt.subplots(figsize=(7,7), subplot_kw={"projection":"3d"})
view_dim = x_test_transform[:,:3]
ax.scatter(*view_dim.transpose(), c=y_test)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

rf = RandomForestClassifier(n_estimators=200, random_state=1212)
rf.fit(x_train, y_train)
print(f"Train accuracy: {rf.score(x_train, y_train):.4f}")
print(f"Test accuracy: {rf.score(x_test, y_test):.4f}")
print("="*30)

rf = RandomForestClassifier(n_estimators=200, random_state=1212)
pca_train_x = pca.transform(x_train)
pca_test_x = pca.transform(x_test)
rf.fit(pca_train_x, y_train)
print(f"PCA train accuracy: {rf.score(pca_train_x, y_train):.4f}")
print(f"PCA test accuracy: {rf.score(pca_test_x, y_test):.4f}")

# AE

In [None]:
import tensorflow as tf
from keras import layers, models

In [None]:
def encoder(input_shape, latent_dim):
    i = layers.Input(shape=input_shape)
    h = layers.Dense((input_shape+latent_dim)//2)(i)
    h = layers.BatchNormalization()(h)
    h = layers.Activation('elu')(h)
    h = layers.Dropout(.2)(h)

    latent = layers.Dense(latent_dim)(h)

    return models.Model(i, latent)

In [None]:
def decoder(input_shape, latent_dim):
    latent = layers.Input(shape=latent_dim)
    h = layers.Dense((input_shape+latent_dim)//2)(latent)
    h = layers.BatchNormalization()(h)
    h = layers.Activation('elu')(h)
    h = layers.Dropout(.2)(h)

    o = layers.Dense(input_shape)(h)

    return models.Model(latent, o)

In [None]:
input_shape = x_train.shape[1]

enc = encoder(input_shape, 3)
dec = decoder(input_shape, 3)

In [None]:
input_layer = tf.keras.Input(input_shape)
output_layer = dec(enc(input_layer))
ae = models.Model(input_layer, output_layer)
ae.compile(optimizer='adam', loss='mse')
ae.summary()

In [None]:
ae.fit(x_train, x_train,
       epochs=10000,
       batch_size=100,
       callbacks=[tf.keras.callbacks.EarlyStopping(patience=20)],
       validation_data=(x_test, x_test))

In [None]:
latent_train = enc.predict(x_train)
latent_test = enc.predict(x_test)

In [None]:
latent_train.shape, latent_test.shape

In [None]:
fig, ax = plt.subplots(figsize=(7,7))
view_dim = latent_test[:,:2]
ax.scatter(*view_dim.transpose(), c=y_test)

In [None]:
fig, ax = plt.subplots(figsize=(7,7), subplot_kw={"projection":"3d"})
ax.scatter(*latent_test.transpose(), c=y_test)

In [None]:
rf = RandomForestClassifier(n_estimators=200)
rf.fit(x_train, y_train)
print(f"Train accuracy: {rf.score(x_train, y_train):.4f}")
print(f"Test accuracy: {rf.score(x_test, y_test):.4f}")
print("="*30)

rf = RandomForestClassifier(n_estimators=200)
rf.fit(latent_train, y_train)
print(f"AE train accuracy: {rf.score(latent_train, y_train):.4f}")
print(f"AE test accuracy: {rf.score(latent_test, y_test):.4f}")

In [None]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [None]:
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

In [None]:
x_train = x_train.reshape(-1, 28 * 28).astype(np.float32) / 255.0
x_test = x_test.reshape(-1, 28 * 28).astype(np.float32) / 255.0

In [None]:
input_shape = x_train.shape[1]

enc = encoder(input_shape, 32)
dec = decoder(input_shape, 32)

In [None]:
input_layer = tf.keras.Input(input_shape)
output_layer = dec(enc(input_layer))
ae = models.Model(input_layer, output_layer)
ae.compile(optimizer='adam', loss='mse')
ae.summary()

In [None]:
ae.fit(x_train, x_train,
       epochs=20,
       batch_size=100,
       validation_data=(x_test, x_test))

In [None]:
plt.figure(figsize=(20,4))
for i in range(5):
    ax = plt.subplot(2, 5, i+1)
    plt.imshow(x_test[i].reshape(28,28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    if i == 2:
        ax.set_title('Original Images')

    ax = plt.subplot(2, 5, i+6)
    plt.imshow(ae.predict(x_test[i].reshape(-1,784)).reshape(28,28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    if i == 2:
        ax.set_title('Reconstructed Images')