# Depression Detection using KANs
To run the following code, ensure you have installed the packages mentioned in the README.MD

In [1]:
pip install pykan

Collecting pykan
  Downloading pykan-0.2.1-py3-none-any.whl (74 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m74.2/74.2 kB[0m [31m898.1 kB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pykan
Successfully installed pykan-0.2.1


Import all the python libraries required for the classification task.

In [2]:
import numpy as np
import pandas as pd
import torch
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from kan import KAN
from kan import *
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

This Python script demonstrates how to use Kolmogorov-Arnold Networks (KANs) for binary classification task on a text dataset. It involves loading and preprocessing text data,  transforming it using TF-IDF, converting the training and testing sets into PyTorch tensors, training a KAN model, and evaluating its performance using various metrics.

# Single-Layer KAN

In [3]:
# Load dataset
df = pd.read_csv('/depression_dataset_reddit_cleaned.csv')

# Text preprocessing using TF-IDF
vectorizer = TfidfVectorizer(max_features=1000)
X = vectorizer.fit_transform(df['clean_text']).toarray()
y = df['is_depression'].values

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
dataset = {
    'train_input': torch.from_numpy(X_train).float(),
    'test_input': torch.from_numpy(X_test).float(),
    'train_label': torch.from_numpy(y_train).long(),
    'test_label': torch.from_numpy(y_test).long()
}

# Model parameters
input_layer_dim = X_train.shape[1]  # Number of features after TF-IDF
first_hidden_layer_dim = 100
num_class = 2  # Binary classification (0 or 1)

# Initialize the KAN model
model = KAN(width=[input_layer_dim, first_hidden_layer_dim, num_class], grid=10, k=2)
model = model.speed()

# Metrics storage
acc_train, f1_train, prec_train, recall_train = [], [], [], []
acc_test, f1_test, prec_test, recall_test = [], [], [], []

def train_acc():
    y_pred_true = torch.argmax(model(dataset['train_input']), dim=1)
    y_true = dataset['train_label']
    acc_train.append(accuracy_score(y_true, y_pred_true))
    f1_train.append(f1_score(y_true, y_pred_true, average='weighted'))
    prec_train.append(precision_score(y_true, y_pred_true, average='weighted'))
    recall_train.append(recall_score(y_true, y_pred_true, average='weighted'))
    return np.mean(acc_train)

def test_acc():
    y_pred_test = torch.argmax(model(dataset['test_input']), dim=1)
    y_test = dataset['test_label']
    acc_test.append(accuracy_score(y_test, y_pred_test))
    f1_test.append(f1_score(y_test, y_pred_test, average='weighted'))
    prec_test.append(precision_score(y_test, y_pred_test, average='weighted'))
    recall_test.append(recall_score(y_test, y_pred_test, average='weighted'))
    return np.mean(acc_test)

# Model training
results = model.fit(dataset, opt="LBFGS", steps=20, metrics=(train_acc, test_acc), loss_fn=torch.nn.CrossEntropyLoss())

# Display results obtained on test data
print(f"Test Accuracy: {np.mean(acc_test)}")
print(f"Test F1 Score: {np.mean(f1_test)}")
print(f"Test Precision: {np.mean(prec_test)}")
print(f"Test Recall: {np.mean(recall_test)}")


train loss: 4.67e-02 | test loss: 4.79e-01 | reg: 0.00e+00 : 100%|█| 20/20 [42:25<00:00, 127.26s/it]

Test Accuracy: 0.9582740788623141
Test F1 Score: 0.9582332778022409
Test Precision: 0.9594176520477313
Test Recall: 0.9582740788623141





# Two-Layer KAN

In [5]:
import numpy as np
import pandas as pd
import torch
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from kan import KAN
from kan import *
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Load dataset
df = pd.read_csv('/depression_dataset_reddit_cleaned.csv')

# Text preprocessing using TF-IDF
vectorizer = TfidfVectorizer(max_features=1000)
X = vectorizer.fit_transform(df['clean_text']).toarray()
y = df['is_depression'].values

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
dataset = {
    'train_input': torch.from_numpy(X_train).float(),
    'test_input': torch.from_numpy(X_test).float(),
    'train_label': torch.from_numpy(y_train).long(),
    'test_label': torch.from_numpy(y_test).long()
}

# Model parameters
input_layer_dim = X_train.shape[1]  # Number of features after TF-IDF
first_hidden_layer_dim = 100
second_hidden_layer_dim = 75
num_class = 2  # Binary classification (0 or 1)

# Initialize the KAN model
model = KAN(width=[input_layer_dim,first_hidden_layer_dim,second_hidden_layer_dim,num_class], grid=4, k=2)
model = model.speed()
# Metrics storage
acc_train, f1_train, prec_train, recall_train = [], [], [], []
acc_test, f1_test, prec_test, recall_test = [], [], [], []

def train_acc():
    y_pred_true = torch.argmax(model(dataset['train_input']), dim=1)
    y_true = dataset['train_label']
    acc_train.append(accuracy_score(y_true, y_pred_true))
    f1_train.append(f1_score(y_true, y_pred_true, average='weighted'))
    prec_train.append(precision_score(y_true, y_pred_true, average='weighted'))
    recall_train.append(recall_score(y_true, y_pred_true, average='weighted'))
    return np.mean(acc_train)

def test_acc():
    y_pred_test = torch.argmax(model(dataset['test_input']), dim=1)
    y_test = dataset['test_label']
    acc_test.append(accuracy_score(y_test, y_pred_test))
    f1_test.append(f1_score(y_test, y_pred_test, average='weighted'))
    prec_test.append(precision_score(y_test, y_pred_test, average='weighted'))
    recall_test.append(recall_score(y_test, y_pred_test, average='weighted'))
    return np.mean(acc_test)

# Model training
results = model.fit(dataset, opt="LBFGS", steps=30, metrics=(train_acc, test_acc), loss_fn=torch.nn.CrossEntropyLoss())

# Display results obtained on test data
print(f"Test Accuracy: {np.mean(acc_test)}")
print(f"Test F1 Score: {np.mean(f1_test)}")
print(f"Test Precision: {np.mean(prec_test)}")
print(f"Test Recall: {np.mean(recall_test)}")


train loss: 4.82e-02 | test loss: 6.99e-01 | reg: 0.00e+00 : 100%|█| 30/30 [1:01:52<00:00, 123.76s/i

Test Accuracy: 0.9539754363283776
Test F1 Score: 0.9539222937909848
Test Precision: 0.9546275821175949
Test Recall: 0.9539754363283776





# Three-Layer KAN

In [6]:
import numpy as np
import pandas as pd
import torch
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from kan import KAN
from kan import *
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Load dataset
df = pd.read_csv('/depression_dataset_reddit_cleaned.csv')

# Text preprocessing using TF-IDF
vectorizer = TfidfVectorizer(max_features=1000)
X = vectorizer.fit_transform(df['clean_text']).toarray()
y = df['is_depression'].values

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
dataset = {
    'train_input': torch.from_numpy(X_train).float(),
    'test_input': torch.from_numpy(X_test).float(),
    'train_label': torch.from_numpy(y_train).long(),
    'test_label': torch.from_numpy(y_test).long()
}

# Model parameters
input_layer_dim = X_train.shape[1]  # Number of features after TF-IDF
first_hidden_layer_dim = 100
second_hidden_layer_dim = 75
third_hidden_layer_dim = 50
num_class = 2  # Binary classification (0 or 1)

# Initialize the KAN model
model = KAN(width=[input_layer_dim,first_hidden_layer_dim,second_hidden_layer_dim,third_hidden_layer_dim, num_class], grid=4, k=2)
model = model.speed()
# Metrics storage
acc_train, f1_train, prec_train, recall_train = [], [], [], []
acc_test, f1_test, prec_test, recall_test = [], [], [], []

def train_acc():
    y_pred_true = torch.argmax(model(dataset['train_input']), dim=1)
    y_true = dataset['train_label']
    acc_train.append(accuracy_score(y_true, y_pred_true))
    f1_train.append(f1_score(y_true, y_pred_true, average='weighted'))
    prec_train.append(precision_score(y_true, y_pred_true, average='weighted'))
    recall_train.append(recall_score(y_true, y_pred_true, average='weighted'))
    return np.mean(acc_train)

def test_acc():
    y_pred_test = torch.argmax(model(dataset['test_input']), dim=1)
    y_test = dataset['test_label']
    acc_test.append(accuracy_score(y_test, y_pred_test))
    f1_test.append(f1_score(y_test, y_pred_test, average='weighted'))
    prec_test.append(precision_score(y_test, y_pred_test, average='weighted'))
    recall_test.append(recall_score(y_test, y_pred_test, average='weighted'))
    return np.mean(acc_test)

# Model training
results = model.fit(dataset, opt="LBFGS", steps=30, metrics=(train_acc, test_acc), loss_fn=torch.nn.CrossEntropyLoss())

# Display results obtained on test data
print(f"Test Accuracy: {np.mean(acc_test)}")
print(f"Test F1 Score: {np.mean(f1_test)}")
print(f"Test Precision: {np.mean(prec_test)}")
print(f"Test Recall: {np.mean(recall_test)}")


train loss: 4.51e-02 | test loss: 6.94e-01 | reg: 0.00e+00 : 100%|█| 30/30 [1:07:15<00:00, 134.52s/i

Test Accuracy: 0.9473389355742298
Test F1 Score: 0.9473090190838374
Test Precision: 0.9478148609809689
Test Recall: 0.9473389355742298



