# Deep Learning Approach

## Imports

In [1]:
from sklearn.model_selection import train_test_split

from src.utils import load_config
from main import run_preprocessing_pipeline
from src.models import BinaryNeuralNetworkModel, MultilabelNeuralNetworkModel
from src.evaluation import evaluate_model

[nltk_data] Error loading stopwords: <urlopen error [SSL:
[nltk_data]     CERTIFICATE_VERIFY_FAILED] certificate verify failed:
[nltk_data]     unable to get local issuer certificate (_ssl.c:1000)>


## Binary Classification

In [None]:
config = load_config("deep-learning/dl-binary")

X, y = run_preprocessing_pipeline(config)
y = y["label"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = BinaryNeuralNetworkModel(X_train.shape[1])

In [None]:
model.fit(X_train, y_train, 0.001, 5, 16)

In [None]:
predictions = model.predict(X_test)

In [None]:
evaluate_model(model, X_test, y_test)

## Multilabel Classification

In [None]:
config = load_config("deep-learning/dl-multilabel")

X, y = run_preprocessing_pipeline(config)
columns = ["advert", "coi", "fanpov", "pr", "resume"]
y = y[columns].values.tolist()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = MultilabelNeuralNetworkModel(X_train.shape[1])

In [None]:
model.fit(X_train, y_train, learning_rate=0.001, num_epochs=5, batch_size=16)

In [None]:
predictions = model.predict(X_test)

In [None]:
evaluate_model(model, X_test, y_test)

## Augmented Classification

In [None]:
config = load_config("deep-learning/dl-augmented")

X, y = run_preprocessing_pipeline(config)

columns = ["advert", "coi", "fanpov", "pr", "resume"]
y = y[columns].values.tolist()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = MultilabelNeuralNetworkModel(X_train.shape[1])

In [None]:
model.fit(X_train, y_train, learning_rate=0.001, num_epochs=5, batch_size=16)

In [None]:
predictions = model.predict(X_test)

In [None]:
evaluate_model(model, X_test, y_test)

## Wikipedia-Dump

In [2]:
config = load_config("deep-learning/dl-wp-binary")

X, y = run_preprocessing_pipeline(config)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

2025-03-13 23:52:22,256 - src.utils - INFO - Loading config from configs/deep-learning/dl-wp-binary.yaml.
2025-03-13 23:52:22,262 - src.data_loader - INFO - Loading data with {'good_file': 'data/wp/good_sample.csv', 'promo_file': 'data/wp/promotional_sample.csv', 'neutral_file': 'data/wp/neutral_sample.csv', 'shuffle': False, 'nrows': 100, 'save': 'loaded_data_wp_binary.csv'}
2025-03-13 23:52:22,263 - src.data_loader - INFO - Loading non-promotional and promotional data for binary classification.
2025-03-13 23:52:22,892 - src.utils - INFO - Data saved to data/intermediary/loaded_data_wp_binary.csv.
2025-03-13 23:52:22,892 - src.preprocessing - INFO - Preprocessing data with {'remove_non_word': True, 'convert_lowercase': True, 'remove_stopwords': True, 'apply_stemming': True, 'remove_numbers': False, 'remove_whitespace': True, 'save': 'preprocessed_data_wp_binary.csv'}
2025-03-13 23:52:22,893 - src.preprocessing - INFO - Removing non-word characters.
2025-03-13 23:52:23,866 - src.prepro

In [3]:
model = BinaryNeuralNetworkModel(X_train.shape[1], num_classes=3)

In [4]:
model.fit(X_train, y_train, 0.001, 5, 16)

KeyError: 21

In [None]:
predictions = model.predict(X_test)

In [None]:
evaluate_model(model, X_test, y_test)