<a href="https://colab.research.google.com/github/DEEPAK-RAMGIRI/AIML-LAB/blob/main/website_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import nltk
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [31]:
df = pd.read_csv('website_classification.csv')

In [32]:
df.head(5)

Unnamed: 0.1,Unnamed: 0,website_url,cleaned_website_text,Category
0,0,https://www.booking.com/index.html?aid=1743217,official site good hotel accommodation big sav...,Travel
1,1,https://travelsites.com/expedia/,expedia hotel book sites like use vacation wor...,Travel
2,2,https://travelsites.com/tripadvisor/,tripadvisor hotel book sites like previously d...,Travel
3,3,https://www.momondo.in/?ispredir=true,cheap flights search compare flights momondo f...,Travel
4,4,https://www.ebookers.com/?AFFCID=EBOOKERS-UK.n...,bot create free account create free account si...,Travel


In [33]:
nltk.download('stopwords') # to use nltk stopwords we need to download the stopwords
nltk.download('punkt_tab')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [34]:
df.columns

Index(['Unnamed: 0', 'website_url', 'cleaned_website_text', 'Category'], dtype='object')

In [35]:
df.dtypes

Unnamed: 0,0
Unnamed: 0,int64
website_url,object
cleaned_website_text,object
Category,object


In [36]:
df = df.drop(["Unnamed: 0", "website_url"], axis=1)

In [37]:
pd.DataFrame(df.Category.unique()).values

array([['Travel'],
       ['Social Networking and Messaging'],
       ['News'],
       ['Streaming Services'],
       ['Sports'],
       ['Photography'],
       ['Law and Government'],
       ['Health and Fitness'],
       ['Games'],
       ['E-Commerce'],
       ['Forums'],
       ['Food'],
       ['Education'],
       ['Computers and Technology'],
       ['Business/Corporate'],
       ['Adult']], dtype=object)

In [38]:
def clean_text(text):
    tokens = word_tokenize(text.lower())
    words = [re.sub(r'[^a-z]', '', word) for word in tokens]
    stop_words = set(stopwords.words('english'))
    filtered = [w for w in words if w and w not in stop_words]
    return ' '.join(filtered)

In [39]:
df['cleaned_website_text'] = df['cleaned_website_text'].apply(clean_text)

In [40]:
label_encoder = LabelEncoder()
df['category_id'] = label_encoder.fit_transform(df['Category'])
num_classes = len(label_encoder.classes_)

In [41]:
vectorizer = TfidfVectorizer(max_features=3000)
X = vectorizer.fit_transform(df['cleaned_website_text']).toarray()

In [42]:
y_encoded = df['category_id'].values.reshape(-1, 1)
one_hot_encoder = OneHotEncoder(sparse_output=False)
y = one_hot_encoder.fit_transform(y_encoded)

In [43]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [44]:
# import numpy as np
# from sklearn.preprocessing import OneHotEncoder

# # Assuming y_train is your target variable
# # If y_train is not one-hot encoded, one-hot encode it
# if len(np.unique(y_train)) > 2:  # For multi-class classification
#     encoder = OneHotEncoder(sparse=False)
#     y_train = encoder.fit_transform(y_train.reshape(-1, 1))

# # Activation Functions
# def relu(x):
#     return np.maximum(0, x)

# def relu_derivative(x):
#     return (x > 0).astype(float)

# def softmax(x):
#     exps = np.exp(x - np.max(x, axis=1, keepdims=True))
#     return exps / np.sum(exps, axis=1, keepdims=True)

# def sigmoid(x):
#     return 1 / (1 + np.exp(-x))

# def sigmoid_derivative(x):
#     return x * (1 - x)

# # Loss function (with small epsilon to prevent log(0))
# def categorical_crossentropy(y_true, y_pred):
#     epsilon = 1e-9
#     return -np.mean(np.sum(y_true * np.log(y_pred + epsilon), axis=1))

# # Initialize Neural Network Parameters
# input_neurons = X_train.shape[1]  # Number of features
# hidden_neurons = 5  # Hidden layer with 5 neurons
# output_neurons = y_train.shape[1]  # Number of categories (output neurons)

# # Initialize Weights and Biases
# np.random.seed(42)
# W1 = np.random.randn(input_neurons, hidden_neurons) * 0.01
# b1 = np.zeros((1, hidden_neurons))
# W2 = np.random.randn(hidden_neurons, output_neurons) * 0.01
# b2 = np.zeros((1, output_neurons))

# # Training Parameters
# # Adjusted learning rate
# learning_rate = 0.001  # Try lowering the learning rate

# # Increase epochs for longer training
# epochs = 5000  # Try more epochs for better convergence

# # Training Loop
# for epoch in range(epochs):
#     # Forward Propagation
#     hidden_input = np.dot(X_train, W1) + b1
#     hidden_output = relu(hidden_input)

#     final_input = np.dot(hidden_output, W2) + b2
#     final_output = softmax(final_input)

#     # Compute Loss (Categorical Cross-Entropy)
#     loss = categorical_crossentropy(y_train, final_output)
#     losses.append(loss)

#     # Backpropagation
#     error_output = final_output - y_train
#     d_output = error_output

#     error_hidden = d_output.dot(W2.T)
#     d_hidden = error_hidden * relu_derivative(hidden_output)

#     # Update Weights and Biases
#     W2 -= hidden_output.T.dot(d_output) * learning_rate
#     b2 -= np.sum(d_output, axis=0, keepdims=True) * learning_rate
#     W1 -= X_train.T.dot(d_hidden) * learning_rate
#     b1 -= np.sum(d_hidden, axis=0, keepdims=True) * learning_rate

#     # Print loss every 500 epochs
#     if epoch % 500 == 0:
#         print(f"Epoch {epoch}, Loss: {loss:.4f}")


In [45]:
model = keras.Sequential([
        layers.Input(shape=(X_train.shape[1],)),
        layers.Dense(128, activation='relu'),  # Change activation function here
        layers.Dense(64, activation='relu'),   # Change activation function here
        layers.Dense(num_classes, activation='softmax')  # Output layer remains softmax
    ])


In [None]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# --- Training ---
history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                    epochs=500, batch_size=32, verbose=1)

# --- Evaluation ---
final_train_loss, final_train_acc = model.evaluate(X_train, y_train, verbose=0)
final_test_loss, final_test_acc = model.evaluate(X_test, y_test, verbose=0)

print("\nFinal Loss & Accuracy:")
print(f"Train Loss: {final_train_loss:.4f} | Train Accuracy: {final_train_acc:.4f}")
print(f"Test Loss: {final_test_loss:.4f} | Test Accuracy: {final_test_acc:.4f}")

# --- Plotting ---
plt.figure(figsize=(12, 5))

# Loss
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label="Train Loss")
plt.plot(history.history['val_loss'], label="Test Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.title("Loss Curve")

# Accuracy
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label="Train Accuracy")
plt.plot(history.history['val_accuracy'], label="Test Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Accuracy Curve")

plt.tight_layout()
plt.show()

Epoch 1/500
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - accuracy: 0.1475 - loss: 2.7415 - val_accuracy: 0.4255 - val_loss: 2.5349
Epoch 2/500
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5657 - loss: 2.3525 - val_accuracy: 0.7612 - val_loss: 1.8752
Epoch 3/500
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.8911 - loss: 1.5186 - val_accuracy: 0.8629 - val_loss: 1.1232
Epoch 4/500
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9694 - loss: 0.6981 - val_accuracy: 0.8865 - val_loss: 0.7072
Epoch 5/500
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9603 - loss: 0.3425 - val_accuracy: 0.8960 - val_loss: 0.5324
Epoch 6/500
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9900 - loss: 0.1532 - val_accuracy: 0.9078 - val_loss: 0.4605
Epoch 7/500
[1m31/31[0m [

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models

# Normalize input data
X_train = (X_train - X_train.mean(axis=0)) / (X_train.std(axis=0) + 1e-9)
X_test = (X_test - X_test.mean(axis=0)) / (X_test.std(axis=0) + 1e-9)

input_dim = X_train.shape[1]
hidden1_dim = 128
hidden2_dim = 64
output_dim = y_train.shape[1]

# ----------------------
# Keras Model Definition
# ----------------------
model = models.Sequential([
    layers.Input(shape=(input_dim,)),
    layers.Dense(hidden1_dim, activation='relu'),
    layers.Dense(hidden2_dim, activation='relu'),
    layers.Dense(output_dim, activation='softmax')
])

# Don't compile or fit the model — weights will be set manually

# ------------------------
# ALO Helper: Flatten/Set Weights
# ------------------------
def flatten_weights(model):
    weights = []
    for layer in model.layers:
        for w in layer.get_weights():
            weights.append(w.flatten())
    return np.concatenate(weights)

def set_model_weights(model, flat_weights):
    new_weights = []
    idx = 0
    for layer in model.layers:
        layer_weights = layer.get_weights()
        new_layer_weights = []
        for w in layer_weights:
            shape = w.shape
            size = np.prod(shape)
            new_w = flat_weights[idx:idx+size].reshape(shape)
            new_layer_weights.append(new_w)
            idx += size
        layer.set_weights(new_layer_weights)

# Forward pass for ALO
def forward_pass(X, flat_weights):
    set_model_weights(model, flat_weights)
    return model.predict(X, verbose=0)

def loss_fn(y_pred, y_true):
    return -np.mean(np.sum(y_true * np.log(y_pred + 1e-9), axis=1))

def accuracy_fn(y_pred, y_true):
    return np.mean(np.argmax(y_pred, axis=1) == np.argmax(y_true, axis=1))

# -----------------
# ALO Implementation
# -----------------
def ALO(fitness_func, dim, lb, ub, epochs=100, pop_size=10):
    antlions = np.random.uniform(lb, ub, (pop_size, dim))
    scores = np.array([fitness_func(x) for x in antlions])
    elite = antlions[np.argmin(scores)]
    elite_score = np.min(scores)

    history_loss = []
    history_acc = []

    def random_walk(dim, lb, ub):
        steps = np.random.choice([-1, 1], size=(epochs, dim))
        rw = np.cumsum(steps, axis=0)
        rw_min = np.min(rw, axis=0)
        rw_max = np.max(rw, axis=0)
        norm_rw = (rw - rw_min) / (rw_max - rw_min + 1e-9)
        return norm_rw * (ub - lb) + lb

    for epoch in range(epochs):
        RW1 = random_walk(dim, lb, ub)[epoch]
        RW2 = random_walk(dim, lb, ub)[epoch]

        for i in range(pop_size):
            rand_antlion = antlions[np.random.randint(pop_size)]
            ants = (RW1 + RW2 + rand_antlion + elite) / 4
            ants = np.clip(ants, lb, ub)
            new_score = fitness_func(ants)

            if new_score < scores[i]:
                antlions[i] = ants
                scores[i] = new_score

                if new_score < elite_score:
                    elite = ants
                    elite_score = new_score

        y_pred = forward_pass(X_train, elite)
        acc = accuracy_fn(y_pred, y_train)
        history_loss.append(elite_score)
        history_acc.append(acc)

    return elite, history_loss, history_acc

# Fitness function for ALO
def fitness_function(weights):
    y_pred = forward_pass(X_train, weights)
    return loss_fn(y_pred, y_train)

# -------------------
# Run ALO Optimization
# -------------------
flat_dim = len(flatten_weights(model))
best_weights, loss_history, acc_history = ALO(fitness_function, flat_dim, lb=-2, ub=2, epochs=100, pop_size=20)

# Set best weights to Keras model
set_model_weights(model, best_weights)

# -------------------
# Evaluation
# -------------------
final_train_loss, final_train_acc = model.evaluate(X_train, y_train, verbose=0)
final_test_loss, final_test_acc = model.evaluate(X_test, y_test, verbose=0)

print("\nFinal Loss & Accuracy:")
print(f"Train Loss: {final_train_loss:.4f} | Train Accuracy: {final_train_acc:.4f}")
print(f"Test Loss: {final_test_loss:.4f} | Test Accuracy: {final_test_acc:.4f}")

# -------------------
# Plotting
# -------------------
plt.figure(figsize=(12, 5))

# Loss
plt.subplot(1, 2, 1)
plt.plot(loss_history, label="Train Loss (ALO)")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.title("Loss Curve")

# Accuracy
plt.subplot(1, 2, 2)
plt.plot(acc_history, label="Train Accuracy (ALO)")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Accuracy Curve")

plt.tight_layout()
plt.show()


In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adadelta

# --- Model Definition ---
model = models.Sequential([
    layers.Input(shape=(X_train.shape[1],)),  # Input layer
    layers.Dense(256, activation='relu'),  # Increased units in hidden layer 1
    layers.BatchNormalization(),  # Batch normalization
    layers.Dropout(0.3),  # Dropout for regularization
    layers.Dense(128, activation='relu'),  # Increased units in hidden layer 2
    layers.BatchNormalization(),  # Batch normalization
    layers.Dropout(0.3),  # Dropout for regularization
    layers.Dense(y_train.shape[1], activation='softmax')  # Output layer
])

# --- Compile with Adadelta Optimizer ---
model.compile(optimizer=Adadelta(learning_rate=1.0),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# --- Early Stopping and Learning Rate Reduction ---
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
lr_reduction = ReduceLROnPlateau(monitor='val_loss', patience=5, verbose=1, factor=0.5, min_lr=0.0001)

# --- Training the Model ---
history = model.fit(X_train, y_train,
                    validation_data=(X_test, y_test),
                    epochs=500,  # Increased epochs
                    batch_size=32,
                    verbose=1,
                    callbacks=[early_stopping, lr_reduction])

# --- Evaluation ---
final_train_loss, final_train_acc = model.evaluate(X_train, y_train, verbose=0)
final_test_loss, final_test_acc = model.evaluate(X_test, y_test, verbose=0)

print("\nFinal Loss & Accuracy:")
print(f"Train Loss: {final_train_loss:.4f} | Train Accuracy: {final_train_acc:.4f}")
print(f"Test Loss: {final_test_loss:.4f} | Test Accuracy: {final_test_acc:.4f}")

# --- Plotting Training Results ---
plt.figure(figsize=(12, 5))

# Loss Plot
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label="Train Loss")
plt.plot(history.history['val_loss'], label="Test Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.title("Loss Curve")

# Accuracy Plot
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label="Train Accuracy")
plt.plot(history.history['val_accuracy'], label="Test Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Accuracy Curve")

plt.tight_layout()
plt.show()
