In [3]:
!pip install tensorflow-datasets


Collecting tensorflow-datasets
  Downloading tensorflow_datasets-4.9.3-py3-none-any.whl.metadata (9.3 kB)
Collecting array-record (from tensorflow-datasets)
  Downloading array_record-0.4.1-py39-none-any.whl.metadata (503 bytes)
Collecting dm-tree (from tensorflow-datasets)
  Downloading dm_tree-0.1.8-cp39-cp39-win_amd64.whl.metadata (2.0 kB)
Collecting etils>=0.9.0 (from etils[enp,epath,etree]>=0.9.0->tensorflow-datasets)
  Downloading etils-1.5.2-py3-none-any.whl.metadata (6.3 kB)
Collecting promise (from tensorflow-datasets)
  Downloading promise-2.3.tar.gz (19 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting tensorflow-metadata (from tensorflow-datasets)
  Downloading tensorflow_metadata-1.17.0-py3-none-any.whl.metadata (2.6 kB)
Collecting protobuf>=3.20 (from tensorflow-datasets)
  Downloading protobuf-4.21.12-cp39-cp39-win_amd64.whl.metadata (541 bytes)
Downloading tensorflow_datasets-4.9.3-py3-none-any.whl (5.0 

  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
grpcio-status 1.71.0rc2 requires protobuf<6.0dev,>=5.26.1, but you have protobuf 4.21.12 which is incompatible.
mediapipe 0.10.20 requires protobuf<5,>=4.25.3, but you have protobuf 4.21.12 which is incompatible.
onnxconverter-common 1.14.0 requires protobuf==3.20.2, but you have protobuf 4.21.12 which is incompatible.


In [None]:
!pip install datasets


In [None]:
from datasets import load_dataset

# Load IMDb dataset
dataset = load_dataset("imdb")

# Convert to Pandas DataFrame
df = pd.DataFrame(dataset["train"])  # Use "test" for test data

# Print first 10 rows
print("Dataset Loaded Successfully!")
print(df.head(10))


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd

# Initialize TF-IDF Vectorizer
vectorizer = TfidfVectorizer(max_features=5000)  # Limit vocabulary size

# Apply TF-IDF transformation
X = vectorizer.fit_transform(df["text"]).toarray()  # Transform text to numerical values
y = df["label"].values  # Labels (0 = Negative, 1 = Positive)

# Convert to DataFrame
tfidf_df = pd.DataFrame(X, columns=vectorizer.get_feature_names_out())

# Print first 10 rows of transformed data
print("\nFirst 10 Rows (After TF-IDF Transformation):")
print(tfidf_df.head(10))


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import BernoulliRBM
from sklearn.model_selection import train_test_split

# Sigmoid function for reconstruction
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Split dataset into train & test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define hyperparameter search space
hidden_units = [64, 128, 256]  
best_rbm = None
best_loss = float("inf")
rbm_losses = {}

# Try different hidden units
for units in hidden_units:
    print(f"\nTraining RBM with {units} hidden units...")
    
    rbm = BernoulliRBM(n_components=units, learning_rate=0.01, n_iter=5, verbose=True, random_state=42)  # Reduced iterations
    
    # Train RBM
    rbm.fit(X_train)
    
    # Transform input through hidden layer
    hidden_features = rbm.transform(X_train)
    
    # Reconstruct input using visible probabilities (sigmoid activation)
    reconstructed_X = sigmoid(np.dot(hidden_features, rbm.components_))
    
    # Compute mean squared error (MSE) as reconstruction loss
    loss = np.mean(np.square(X_train - reconstructed_X))  
    
    rbm_losses[units] = loss
    
    # Keep track of the best RBM
    if loss < best_loss:
        best_loss = loss
        best_rbm = rbm

print("\nHyperparameter tuning completed!")
print(f"Best RBM has {best_rbm.n_components} hidden units with loss: {best_loss:.4f}")

# Plot loss for different hyperparameters
plt.figure(figsize=(8, 5))
plt.plot(rbm_losses.keys(), rbm_losses.values(), marker='o', linestyle='--', color='r')
plt.xlabel("Number of Hidden Units")
plt.ylabel("Reconstruction Loss")
plt.title("RBM Hyperparameter Tuning: Hidden Units vs. Loss")
plt.grid()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

best_rbm.n_iter = 50  
loss_curve = []
for epoch in range(1, 51):  
    best_rbm.fit(X_train)  
    reconstructed_X = best_rbm.transform(X_train)  
    loss = np.mean(np.square(X_train - best_rbm.inverse_transform(reconstructed_X)))  # MSE
    loss_curve.append(loss)
    print(f"Epoch {epoch}/50 - Loss: {loss:.4f}")

# Plot Loss Curve
plt.figure(figsize=(8, 5))
plt.plot(range(1, 51), loss_curve, marker='o', linestyle='-', color='b', label="Reconstruction Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("RBM Training Loss Curve")
plt.legend()
plt.grid()
plt.show()
