In [1]:
#forest classifier
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
file_path = "/content/oasis_cross-sectional-5708aa0a98d82080.xlsx"
df = pd.read_excel(file_path)

# Drop irrelevant columns
df.drop(columns=["ID"], inplace=True, errors='ignore')

# Handle missing values (fill with median for numerical columns)
numeric_cols = ["Age", "Educ", "SES", "MMSE", "CDR", "eTIV", "nWBV", "ASF", "Delay"]
df[numeric_cols] = df[numeric_cols].apply(lambda x: x.fillna(x.median()))

# Encode categorical variables
label_enc = LabelEncoder()
df["M/F"] = label_enc.fit_transform(df["M/F"])
df["Hand"] = label_enc.fit_transform(df["Hand"])

# Define target variable (Dementia classification based on CDR)
df["Group"] = (df["CDR"] > 0).astype(int)  # 1 for Demented, 0 for Non-Demented
df.drop(columns=["CDR"], inplace=True)  # Remove CDR as it's now encoded in Group

# Feature Scaling
scaler = StandardScaler()
numeric_cols.remove("CDR")  # Since we dropped it
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

# Split data into train and test sets
X = df.drop(columns=["Group"])
y = df["Group"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Function to make a prediction based on user input
def predict_dementia(*args):
    input_data = pd.DataFrame([args], columns=X_train.columns)  # Use correct column names
    input_data[numeric_cols] = scaler.transform(input_data[numeric_cols])  # Scale input data
    prediction = model.predict(input_data)[0]
    return "Demented" if prediction == 1 else "Non-Demented"

# Example usage
print(predict_dementia(75, 12, 2, 28, 1500, 0.7, 1.2, 1, 1, 100))

Model Accuracy: 0.92
Non-Demented


Unable to create process using 'C:\Users\Rohan Dedhia\anaconda3\python.exe "C:\Users\Rohan Dedhia\anaconda3\Scripts\pip-script.py" install torch torchvision torchaudio'


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Load the dataset
file_path = "/content/oasis_cross-sectional-5708aa0a98d82080.xlsx"
df = pd.read_excel(file_path)

# Drop irrelevant columns
df.drop(columns=["ID", "Delay"], inplace=True, errors='ignore')

# Handle missing values
df.fillna(df.median(numeric_only=True), inplace=True)
df.fillna(df.mode().iloc[0], inplace=True)

# Encode categorical variables
label_enc = LabelEncoder()
df["M/F"] = label_enc.fit_transform(df["M/F"])
df["Hand"] = label_enc.fit_transform(df["Hand"])

# Encode target variable "CDR"
y = df["CDR"]
y = label_enc.fit_transform(y)  # Ensures labels start from 0

# Check unique labels after encoding
print("Unique labels after encoding:", np.unique(y))

# Feature Scaling
scaler = StandardScaler()
numeric_cols = ["Age", "Educ", "SES", "MMSE", "eTIV", "nWBV", "ASF"]
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

# Split data into train, validation, and test sets
X = df.drop(columns=["CDR"])
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create PyTorch datasets and loaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define Attention Layer
class AttentionLayer(nn.Module):
    def __init__(self, input_dim):
        super(AttentionLayer, self).__init__()
        self.attention_weights = nn.Linear(input_dim, 1)

    def forward(self, x):
        attn_scores = torch.softmax(self.attention_weights(x), dim=1)
        return x * attn_scores

# Define ResNet-based Model with Attention
class AttentionResNet(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(AttentionResNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.attn1 = AttentionLayer(128)
        self.res1 = nn.Linear(128, 128)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.attn2 = AttentionLayer(64)
        self.res2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.attn1(x) + self.res1(x)  # Residual connection
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.attn2(x) + self.res2(x)  # Residual connection
        x = self.fc3(x)
        return x

# Initialize model
input_dim = X_train.shape[1]
num_classes = len(np.unique(y))  # Ensure correct number of classes
model = AttentionResNet(input_dim, num_classes)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop with validation
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=20):
    model.train()
    for epoch in range(epochs):
        total_loss, correct, total = 0, 0, 0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == batch_y).sum().item()
            total += batch_y.size(0)

        train_acc = correct / total
        val_acc = evaluate_model(model, val_loader)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}, Train Acc: {train_acc:.2f}, Val Acc: {val_acc:.2f}")

# Evaluate the model
def evaluate_model(model, data_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for batch_X, batch_y in data_loader:
            outputs = model(batch_X)
            _, predicted = torch.max(outputs, 1)
            total += batch_y.size(0)
            correct += (predicted == batch_y).sum().item()
    return correct / total

# Train and evaluate
train_model(model, train_loader, val_loader, criterion, optimizer)

# Final Test Accuracy
test_accuracy = evaluate_model(model, test_loader)
print(f"Test Accuracy: {test_accuracy:.2f}")


Unique labels after encoding: [0 1 2 3]
Epoch 1/20, Loss: 0.9893, Train Acc: 0.65, Val Acc: 0.78
Epoch 2/20, Loss: 0.6564, Train Acc: 0.82, Val Acc: 0.85
Epoch 3/20, Loss: 0.3923, Train Acc: 0.83, Val Acc: 0.80
Epoch 4/20, Loss: 0.3250, Train Acc: 0.84, Val Acc: 0.83
Epoch 5/20, Loss: 0.2870, Train Acc: 0.87, Val Acc: 0.83
Epoch 6/20, Loss: 0.2802, Train Acc: 0.86, Val Acc: 0.86
Epoch 7/20, Loss: 0.2731, Train Acc: 0.87, Val Acc: 0.86
Epoch 8/20, Loss: 0.2679, Train Acc: 0.87, Val Acc: 0.80
Epoch 9/20, Loss: 0.2584, Train Acc: 0.88, Val Acc: 0.83
Epoch 10/20, Loss: 0.2549, Train Acc: 0.88, Val Acc: 0.83
Epoch 11/20, Loss: 0.2401, Train Acc: 0.89, Val Acc: 0.83
Epoch 12/20, Loss: 0.2337, Train Acc: 0.88, Val Acc: 0.78
Epoch 13/20, Loss: 0.2244, Train Acc: 0.90, Val Acc: 0.82
Epoch 14/20, Loss: 0.2281, Train Acc: 0.89, Val Acc: 0.82
Epoch 15/20, Loss: 0.2106, Train Acc: 0.90, Val Acc: 0.85
Epoch 16/20, Loss: 0.2113, Train Acc: 0.90, Val Acc: 0.85
Epoch 17/20, Loss: 0.2109, Train Acc: 0.9

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Load the dataset
file_path = "/content/oasis_cross-sectional-5708aa0a98d82080.xlsx"
df = pd.read_excel(file_path)

# Rename columns to match expected names
df.rename(columns={"Educ": "EDUC"}, inplace=True)

# Drop irrelevant columns
df.drop(columns=["ID", "Delay"], inplace=True, errors='ignore')

# Handle missing values
df.fillna(df.median(numeric_only=True), inplace=True)
df.fillna(df.mode().iloc[0], inplace=True)

# Encode categorical variables
label_enc = LabelEncoder()
df["M/F"] = label_enc.fit_transform(df["M/F"])
df["Hand"] = label_enc.fit_transform(df["Hand"])

# Define a new target variable (binary classification example)
df["Target"] = (df["CDR"] > 0).astype(int)  # Example: Classifying cognitive impairment

# Feature Scaling
scaler = StandardScaler()
numeric_cols = ["Age", "EDUC", "SES", "MMSE", "CDR", "eTIV", "nWBV", "ASF"]
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

# Reshape data for CNN (adding a temporal axis for concatenation)
X = df.drop(columns=["Target"]).values
y = df["Target"].values

# Convert to 3D tensor format for CNN input (batch_size, channels, sequence_length)
X = X.reshape(X.shape[0], 1, X.shape[1])  # 1 channel since data is not an image

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define CNN Model with Concatenated Frames
class CNNWithConcatenation(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(CNNWithConcatenation, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(16)
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(32)
        self.conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm1d(64)
        self.global_pool = nn.AdaptiveAvgPool1d(1)  # Pool to single value per channel
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x):
        x = torch.relu(self.bn1(self.conv1(x)))
        x = torch.relu(self.bn2(self.conv2(x)))
        x = torch.relu(self.bn3(self.conv3(x)))
        x = self.global_pool(x)  # Reduce dimensions
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x

# Initialize model
input_dim = X_train.shape[2]  # Number of features
num_classes = len(np.unique(y))
model = CNNWithConcatenation(input_dim, num_classes)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
def train_model(model, train_loader, criterion, optimizer, epochs=20):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")

train_model(model, train_loader, criterion, optimizer)

# Evaluate the model
def evaluate_model(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for batch_X, batch_y in test_loader:
            outputs = model(batch_X)
            _, predicted = torch.max(outputs, 1)
            total += batch_y.size(0)
            correct += (predicted == batch_y).sum().item()
    accuracy = correct / total
    print(f"Test Accuracy: {accuracy:.2f}")

evaluate_model(model, test_loader)

Epoch 1/20, Loss: 0.5901
Epoch 2/20, Loss: 0.4095
Epoch 3/20, Loss: 0.3431
Epoch 4/20, Loss: 0.2867
Epoch 5/20, Loss: 0.2512
Epoch 6/20, Loss: 0.2178
Epoch 7/20, Loss: 0.2080
Epoch 8/20, Loss: 0.1623
Epoch 9/20, Loss: 0.1492
Epoch 10/20, Loss: 0.1261
Epoch 11/20, Loss: 0.1054
Epoch 12/20, Loss: 0.1028
Epoch 13/20, Loss: 0.0826
Epoch 14/20, Loss: 0.0861
Epoch 15/20, Loss: 0.0719
Epoch 16/20, Loss: 0.0521
Epoch 17/20, Loss: 0.0439
Epoch 18/20, Loss: 0.0392
Epoch 19/20, Loss: 0.0363
Epoch 20/20, Loss: 0.0416
Test Accuracy: 0.98


In [26]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

# Load dataset
file_path = "/content/oasis_cross-sectional-5708aa0a98d82080.xlsx"
df = pd.read_excel(file_path)

# Drop irrelevant columns
df.drop(columns=["ID"], inplace=True, errors='ignore')

# Handle missing values
df.fillna(df.median(numeric_only=True), inplace=True)
df.fillna(df.mode().iloc[0], inplace=True)

# Encode categorical variables
label_enc = LabelEncoder()
for col in ["M/F", "Hand"]:
    if col in df.columns:
        df[col] = label_enc.fit_transform(df[col].astype(str))

# Select target variable
target_col = "CDR" if "CDR" in df.columns else None
if not target_col:
    raise ValueError("CDR column not found. Check dataset.")

# Inspect unique CDR values
print("Unique CDR values:", df[target_col].unique())

# Bin CDR values into categories with improved logic
cdr_bins = [0, 0.5, 1.5, 3]
cdr_labels = [0, 1, 2]

df[target_col] = pd.cut(df[target_col], bins=cdr_bins, labels=cdr_labels, include_lowest=True)
print("Binned CDR values:", df[target_col].unique())

# Drop NaNs after binning
df.dropna(subset=[target_col], inplace=True)

# Convert categories to integers
df[target_col] = df[target_col].astype(int)

# Handle class imbalance
class_weights = compute_class_weight('balanced', classes=np.unique(df[target_col]), y=df[target_col])
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32)

# Split features into categories
numeric_cols = ["Age", "Educ", "SES", "MMSE", "eTIV", "nWBV", "ASF"]
categorical_cols = [col for col in df.columns if col not in numeric_cols + [target_col]]

# Scale numerical features
df[numeric_cols] = StandardScaler().fit_transform(df[numeric_cols])

# Split data into train and test sets
X_num = df[numeric_cols]
X_cat = df[categorical_cols]
y = df[target_col]

X_num_train, X_num_test, y_train, y_test = train_test_split(X_num, y, test_size=0.2, random_state=42)
X_cat_train, X_cat_test, _, _ = train_test_split(X_cat, y, test_size=0.2, random_state=42)

# Convert to tensors
X_num_train_tensor = torch.tensor(X_num_train.values, dtype=torch.float32)
X_num_test_tensor = torch.tensor(X_num_test.values, dtype=torch.float32)
X_cat_train_tensor = torch.tensor(X_cat_train.values, dtype=torch.float32)
X_cat_test_tensor = torch.tensor(X_cat_test.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values.astype(int), dtype=torch.long)
y_test_tensor = torch.tensor(y_test.values.astype(int), dtype=torch.long)

# Validate class indices
num_classes = len(np.unique(y_train))
if y_train_tensor.max().item() >= num_classes:
    raise ValueError(f"Invalid target value. Expected classes in range [0, {num_classes - 1}], but found {y_train_tensor.max().item()}.")

# Create PyTorch datasets
num_dataset = TensorDataset(X_num_train_tensor, y_train_tensor)
num_loader = DataLoader(num_dataset, batch_size=32, shuffle=True)

# Define models
class SimpleNN(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize models
num_model = SimpleNN(X_num_train.shape[1], num_classes)
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_cat_train, y_train)

# Train the neural network with early stopping
criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
optimizer = optim.Adam(num_model.parameters(), lr=0.001)

best_loss = float('inf')
patience = 3
patience_counter = 0

for epoch in range(50):
    num_model.train()
    total_loss = 0
    for batch_X, batch_y in num_loader:
        optimizer.zero_grad()
        outputs = num_model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_loss = total_loss / len(num_loader)
    print(f"Epoch {epoch + 1}, Loss: {avg_loss:.4f}")

    # Early stopping
    if avg_loss < best_loss:
        best_loss = avg_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping triggered")
            break

# Ensemble evaluation
num_model.eval()
with torch.no_grad():
    nn_preds = num_model(X_num_test_tensor).numpy()
    rf_preds = rf_classifier.predict_proba(X_cat_test)

    combined_preds = 0.6 * nn_preds + 0.4 * rf_preds
    final_preds = np.argmax(combined_preds, axis=1)
    final_accuracy = np.mean(final_preds == y_test.values)

    print(f"Ensemble Model Accuracy: {final_accuracy:.2f}")

# The model now properly aligns tensor sizes and ensures data integrity before training! 🚀


Unique CDR values: [0.  0.5 1.  2. ]
Binned CDR values: [0, 1, 2]
Categories (3, int64): [0 < 1 < 2]
Epoch 1, Loss: 1.0824
Epoch 2, Loss: 0.9516
Epoch 3, Loss: 0.8569
Epoch 4, Loss: 0.7770
Epoch 5, Loss: 0.7343
Epoch 6, Loss: 0.6633
Epoch 7, Loss: 0.6306
Epoch 8, Loss: 0.5692
Epoch 9, Loss: 0.5782
Epoch 10, Loss: 0.5493
Epoch 11, Loss: 0.5147
Epoch 12, Loss: 0.5126
Epoch 13, Loss: 0.4730
Epoch 14, Loss: 0.4443
Epoch 15, Loss: 0.4299
Epoch 16, Loss: 0.4334
Epoch 17, Loss: 0.3981
Epoch 18, Loss: 0.4128
Epoch 19, Loss: 0.4018
Epoch 20, Loss: 0.3622
Epoch 21, Loss: 0.3590
Epoch 22, Loss: 0.3731
Epoch 23, Loss: 0.3461
Epoch 24, Loss: 0.3327
Epoch 25, Loss: 0.3537
Epoch 26, Loss: 0.3482
Epoch 27, Loss: 0.3199
Epoch 28, Loss: 0.3322
Epoch 29, Loss: 0.3222
Epoch 30, Loss: 0.2950
Epoch 31, Loss: 0.2979
Epoch 32, Loss: 0.2900
Epoch 33, Loss: 0.2844
Epoch 34, Loss: 0.2959
Epoch 35, Loss: 0.2869
Epoch 36, Loss: 0.2902
Early stopping triggered
Ensemble Model Accuracy: 0.92


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Load the dataset
file_path = "/content/oasis_cross-sectional-5708aa0a98d82080.xlsx"
df = pd.read_excel(file_path)

# Drop unnecessary columns
df.drop(columns=["ID", "Hand"], inplace=True, errors='ignore')

# Handle missing values
df.fillna(df.median(numeric_only=True), inplace=True)

# Encode categorical variable
df["M/F"] = LabelEncoder().fit_transform(df["M/F"])

# Select features and target variable (CDR)
numeric_cols = ["Age", "Educ", "SES", "MMSE", "eTIV", "nWBV", "ASF"]
target_col = "CDR"

# Normalize features
scaler = StandardScaler()
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

# Reshape data for LSTM
sequence_length = 3  # Using previous 3 records to predict the next state
def create_sequences(data, target, seq_length):
    sequences, labels = [], []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i + seq_length].values)
        labels.append(target[i + seq_length])
    return np.array(sequences), np.array(labels)

X, y = create_sequences(df[numeric_cols], df[target_col], sequence_length)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        final_out = lstm_out[:, -1, :]
        return self.fc(final_out)

# Initialize model
input_dim = X_train.shape[2]
hidden_dim = 64
num_layers = 2
num_classes = len(np.unique(y))
model = LSTMModel(input_dim, hidden_dim, num_layers, num_classes)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
def train_model(model, train_loader, criterion, optimizer, epochs=20):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")

train_model(model, train_loader, criterion, optimizer)

# Evaluate the model
def evaluate_model(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for batch_X, batch_y in test_loader:
            outputs = model(batch_X)
            _, predicted = torch.max(outputs, 1)
            total += batch_y.size(0)
            correct += (predicted == batch_y).sum().item()
    accuracy = correct / total
    print(f"Test Accuracy: {accuracy:.2f}")

evaluate_model(model, test_loader)


Epoch 1/20, Loss: 1.3764
Epoch 2/20, Loss: 1.1893
Epoch 3/20, Loss: 0.8211
Epoch 4/20, Loss: 0.3621
Epoch 5/20, Loss: 0.3101
Epoch 6/20, Loss: 0.2937
Epoch 7/20, Loss: 0.2783
Epoch 8/20, Loss: 0.2641
Epoch 9/20, Loss: 0.2661
Epoch 10/20, Loss: 0.2631
Epoch 11/20, Loss: 0.2599
Epoch 12/20, Loss: 0.2627
Epoch 13/20, Loss: 0.2587
Epoch 14/20, Loss: 0.2604
Epoch 15/20, Loss: 0.2610
Epoch 16/20, Loss: 0.2618
Epoch 17/20, Loss: 0.2588
Epoch 18/20, Loss: 0.2614
Epoch 19/20, Loss: 0.2577
Epoch 20/20, Loss: 0.2612
Test Accuracy: 0.92
