In [None]:
import tensorflow as tf
import numpy as np

# Load the CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

# x_train and x_test contain the images, y_train and y_test contain the labels

# Look at the shape
print("x_train shape:", x_train.shape)
print("y_train shape:", y_train.shape)
print("x_test shape:", x_test.shape)
print("y_test shape:", y_test.shape)

In [None]:
from sklearn.model_selection import train_test_split, cross_val_score, KFold, cross_val_predict
from sklearn.linear_model import LogisticRegression

# Split the training data into train and holdout sets
X_train, X_holdout, Y_train, Y_holdout = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

X_train_flattened = np.array([image.flatten() for image in X_train])
X_holdout_flattened = np.array([image.flatten() for image in X_holdout])
Y_train_flat = Y_train.ravel()
Y_holdout_flat = Y_holdout.ravel()

# Normalise the data
X_train_flattened = X_train_flattened / 255
X_holdout_flattened = X_holdout_flattened / 255

# Shape of the normalised data
print('X_train shape:', X_train_flattened.shape)
print('Y_train shape:', Y_train_flat.shape)
print('X_holdout shape:', X_holdout_flattened.shape)
print('Y_holdout shape:', Y_holdout_flat.shape)

In [None]:
# Linear Discriminant Analysis
from sklearn.model_selection import cross_val_score, KFold, cross_val_predict
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import recall_score

lda = LinearDiscriminantAnalysis()

lda.fit(X_train_flattened, Y_train_flat)

In [None]:
# Evaluation

# KFold cross validation
cv = KFold(n_splits=5, shuffle=True, random_state=42)

# Perform cross-validation within the training set
training_scores = cross_val_score(lda, X_train_flattened, Y_train_flat, cv=cv, scoring='accuracy')

mean_score = training_scores.mean()
std_score = training_scores.std()

# Use cross_val_predict to obtain predictions for each sample
predictions = cross_val_predict(lda, X_train_flattened, Y_train_flat, cv=cv, method='predict')

recall = recall_score(Y_train_flat, predictions, average='macro')

print('LDA training scores:', training_scores)
print('Predictions:', predictions)
print('Recall:', recall)
print('STD', std_score)

In [None]:
# Using the LDA to tranform the data for feature reduction

# Applying Transform
X_train_reduced = lda.transform(X_train_flattened)
print(X_train_reduced)

# shape of the reduced data
print(X_train_reduced.shape)

In [None]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression

X_train = X_train_reduced.copy()
y_train = Y_train_flat.copy()

# Choosing the model
lrModel = LogisticRegression(max_iter=100)

# Fit the model on the entire training set
lrModel.fit(X_train, y_train)

In [None]:
# Evaluation

from sklearn.model_selection import cross_val_score, KFold, cross_val_predict
from sklearn.metrics import precision_score, recall_score, f1_score

# Cross-validation method within the training set
cv = KFold(n_splits=5, shuffle=True, random_state=42)

# Perform cross-validation within the training set
training_scores = cross_val_score(lrModel, X_train, y_train, cv=cv, scoring='accuracy')

# You can also calculate the mean and standard deviation of the scores
mean_score = training_scores.mean()
std_score = training_scores.std()

# Use cross_val_predict to obtain predictions for each sample
predictions = cross_val_predict(lrModel, X_train, y_train, cv=cv, method='predict')

# Recall
recall = recall_score(Y_train_flat, predictions, average='macro')

# Precision
precision = precision_score(Y_train_flat, predictions, average='macro')

# F1 score
f1 = f1_score(Y_train_flat, predictions, average='macro')


print('training scores:', training_scores)
print('prediction scores:', predictions)
print("Precision:", precision)
print('Recall:', recall)
print("F1 Score:", f1)