In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#Running each model individually on both types of dataset(the formal and informal dataset)and then testingeach model on the combined dataset as well
#Diabetes_cleaned.csv=formal dataset
#Corrected_Labeled.csv= informal dataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

# Load datasets
df_formal = pd.read_csv("/content/drive/MyDrive/Diabetes_cleaned.csv")
df_informal = pd.read_csv("/content/drive/MyDrive/Corrected_Labeled.csv", encoding='ISO-8859-1')


df_formal['combined'] = df_formal['title'].astype(str).str.strip() + " " + df_formal['content'].astype(str).str.strip()

# Clean missing or empty rows
df_formal = df_formal[['combined', 'label']].dropna()
df_formal = df_formal[df_formal['combined'].str.strip() != '']
df_formal['label'] = df_formal['label'].astype(int)


df_informal['combined'] = df_informal['Text '].astype(str).str.strip()

# Clean missing or empty rows
df_informal = df_informal[['combined', 'Label']].dropna()
df_informal = df_informal[df_informal['combined'].str.strip() != '']
df_informal = df_informal.rename(columns={'Label': 'label'})
df_informal['label'] = df_informal['label'].astype(int)

#Combine Both Datasets
df_combined = pd.concat([df_formal, df_informal], ignore_index=True)
df_combined = df_combined.dropna(subset=['combined', 'label'])

#TF-IDF Vectorization
vectorizer = TfidfVectorizer()

#Run Model Function
def run_models(df, name="Dataset"):
    print(f"\n{'='*20} {name} {'='*20}")
    X = vectorizer.fit_transform(df['combined']).toarray()
    y = df['label'].astype(int)

    # Skip datasets with one class
    if len(np.unique(y)) < 2:
        print(f"Skipping {name}: only one class present in data.")
        return

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    models = {
        "Logistic Regression": LogisticRegression(max_iter=1000),
        "SVM": LinearSVC(),
        "Random Forest": RandomForestClassifier(),
        "Multinomial NB": MultinomialNB()
    }

    for model_name, model in models.items():
        print(f"\n-- {model_name} --")
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        print(f"Accuracy: {acc:.4f}")
        print("Classification Report:")
        print(classification_report(y_test, y_pred, digits=4))

# === Run Models ===
run_models(df_formal, "Formal Dataset")
run_models(df_informal, "Informal Dataset")
run_models(df_combined, "Combined Dataset")




-- Logistic Regression --
Accuracy: 0.8987
Classification Report:
              precision    recall  f1-score   support

           0     0.8844    0.9910    0.9347       332
           1     0.9634    0.6475    0.7745       122

    accuracy                         0.8987       454
   macro avg     0.9239    0.8193    0.8546       454
weighted avg     0.9056    0.8987    0.8916       454


-- SVM --
Accuracy: 0.9581
Classification Report:
              precision    recall  f1-score   support

           0     0.9510    0.9940    0.9720       332
           1     0.9813    0.8607    0.9170       122

    accuracy                         0.9581       454
   macro avg     0.9662    0.9273    0.9445       454
weighted avg     0.9592    0.9581    0.9572       454


-- Random Forest --
Accuracy: 0.9053
Classification Report:
              precision    recall  f1-score   support

           0     0.8874    0.9970    0.9390       332
           1     0.9877    0.6557    0.7882       122

  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
#Rerunning each model individually on both types of dataset(the formal and informal dataset)and then testingeach model on the combined dataset as well (factoring in class imbalance as the dataset was imbalanced)
#Diabetes_cleaned.csv=formal dataset
#Corrected_Labeled.csv= informal dataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

#Load Data
df_formal = pd.read_csv("/content/drive/MyDrive/Diabetes_cleaned.csv")
df_informal = pd.read_csv("/content/drive/MyDrive/Corrected_Labeled.csv", encoding='ISO-8859-1')

#Process Formal Dataset
df_formal['combined'] = df_formal['title'].astype(str).str.strip() + " " + df_formal['content'].astype(str).str.strip()
df_formal = df_formal[['combined', 'label']].dropna()
df_formal = df_formal[df_formal['combined'].str.strip() != '']
df_formal['label'] = df_formal['label'].astype(int)

#Process Informal Dataset
df_informal['combined'] = df_informal['Text '].astype(str).str.strip()
df_informal = df_informal[['combined', 'Label']].dropna()
df_informal = df_informal[df_informal['combined'].str.strip() != '']
df_informal = df_informal.rename(columns={'Label': 'label'})
df_informal['label'] = df_informal['label'].astype(int)

# === Combine Both Datasets ===
df_combined = pd.concat([df_formal, df_informal], ignore_index=True)
df_combined = df_combined.dropna(subset=['combined', 'label'])

vectorizer = TfidfVectorizer()

def run_models(df, name="Dataset"):
    print(f"\n{'='*20} {name} {'='*20}")
    X = vectorizer.fit_transform(df['combined']).toarray()
    y = df['label'].astype(int)

    if len(np.unique(y)) < 2:
        print(f"Skipping {name}: only one class present.")
        return

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    models = {
        "Logistic Regression": LogisticRegression(max_iter=1000, class_weight='balanced'),
        "SVM": LinearSVC(class_weight='balanced'),
        "Random Forest": RandomForestClassifier(class_weight='balanced'),
        "Multinomial NB": MultinomialNB()  # Does NOT support class_weight
    }

    for model_name, model in models.items():
        print(f"\n-- {model_name} --")
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        print(f"Accuracy: {acc:.4f}")
        print("Classification Report:")
        print(classification_report(y_test, y_pred, digits=4))

# === Run Experiments ===
run_models(df_formal, "Formal Dataset")
run_models(df_informal, "Informal Dataset")
run_models(df_combined, "Combined Dataset")




-- Logistic Regression --
Accuracy: 0.9295
Classification Report:
              precision    recall  f1-score   support

           0     0.9491    0.9548    0.9520       332
           1     0.8750    0.8607    0.8678       122

    accuracy                         0.9295       454
   macro avg     0.9121    0.9077    0.9099       454
weighted avg     0.9292    0.9295    0.9293       454


-- SVM --
Accuracy: 0.9581
Classification Report:
              precision    recall  f1-score   support

           0     0.9617    0.9819    0.9717       332
           1     0.9478    0.8934    0.9198       122

    accuracy                         0.9581       454
   macro avg     0.9547    0.9377    0.9458       454
weighted avg     0.9579    0.9581    0.9578       454


-- Random Forest --
Accuracy: 0.9053
Classification Report:
              precision    recall  f1-score   support

           0     0.8853    1.0000    0.9392       332
           1     1.0000    0.6475    0.7861       122

  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
#Trained each model on the formal dataset and tested on the informal dataset and vice versa
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

def evaluate_models(X_train, y_train, X_test, y_test, dataset_name):
    models = {
        "Logistic Regression": LogisticRegression(max_iter=1000, class_weight='balanced'),
        "SVM": LinearSVC(class_weight='balanced'),
        "Random Forest": RandomForestClassifier(class_weight='balanced'),
        "Multinomial NB": MultinomialNB()  # Note: doesn't support class_weight
    }

    reports = {}
    for model_name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        report = classification_report(y_test, y_pred, digits=4, output_dict=True)
        acc = accuracy_score(y_test, y_pred)
        reports[model_name] = {
            "accuracy": acc,
            "f1_class_0": report["0"]["f1-score"],
            "f1_class_1": report["1"]["f1-score"],
            "macro_avg_f1": report["macro avg"]["f1-score"],
            "weighted_avg_f1": report["weighted avg"]["f1-score"]
        }

    return pd.DataFrame(reports).T

# Train on Diabetes_cleaned, test on sampled Corrected_Labeled
X_formal = vectorizer.transform(df_formal['combined']).toarray()
y_formal = df_formal['label']
df_informal_sample = df_informal.sample(n=20, random_state=42)
X_inf_sample = vectorizer.transform(df_informal_sample['combined']).toarray()
y_inf_sample = df_informal_sample['label']
results_formal_on_informal = evaluate_models(X_formal, y_formal, X_inf_sample, y_inf_sample, "Formal → Informal")

# Train on Corrected_Labeled, test on sampled Diabetes_cleaned
X_informal = vectorizer.transform(df_informal['combined']).toarray()
y_informal = df_informal['label']
df_formal_sample = df_formal.sample(n=20, random_state=42)
X_form_sample = vectorizer.transform(df_formal_sample['combined']).toarray()
y_form_sample = df_formal_sample['label']
results_informal_on_formal = evaluate_models(X_informal, y_informal, X_form_sample, y_form_sample, "Informal → Formal")

results_formal_on_informal, results_informal_on_formal


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

(                     accuracy  f1_class_0  f1_class_1  macro_avg_f1  \
 Logistic Regression      0.65    0.787879    0.000000      0.393939   
 SVM                      0.55    0.709677    0.000000      0.354839   
 Random Forest            0.60    0.714286    0.333333      0.523810   
 Multinomial NB           0.85    0.918919    0.000000      0.459459   
 
                      weighted_avg_f1  
 Logistic Regression         0.669697  
 SVM                         0.603226  
 Random Forest               0.657143  
 Multinomial NB              0.781081  ,
                      accuracy  f1_class_0  f1_class_1  macro_avg_f1  \
 Logistic Regression      0.75    0.857143         0.0      0.428571   
 SVM                      0.75    0.857143         0.0      0.428571   
 Random Forest            0.75    0.857143         0.0      0.428571   
 Multinomial NB           0.75    0.857143         0.0      0.428571   
 
                      weighted_avg_f1  
 Logistic Regression         0.6428

In [None]:
# Check how many of each label are available
formal_counts = df_formal['label'].value_counts()
informal_counts = df_informal['label'].value_counts()

# Determine the max number of balanced samples we can draw per class
n_formal = min(formal_counts.get(0, 0), formal_counts.get(1, 0))
n_informal = min(informal_counts.get(0, 0), informal_counts.get(1, 0))

# Sample the maximum available balanced samples
formal_0 = df_formal[df_formal['label'] == 0].sample(n=n_formal, random_state=42)
formal_1 = df_formal[df_formal['label'] == 1].sample(n=n_formal, random_state=42)
informal_0 = df_informal[df_informal['label'] == 0].sample(n=n_informal, random_state=42)
informal_1 = df_informal[df_informal['label'] == 1].sample(n=n_informal, random_state=42)

# Combine and shuffle
formal_balanced_sample = pd.concat([formal_0, formal_1]).sample(frac=1, random_state=42).reset_index(drop=True)
informal_balanced_sample = pd.concat([informal_0, informal_1]).sample(frac=1, random_state=42).reset_index(drop=True)

# Vectorize balanced samples
X_formal_sample = vectorizer.transform(formal_balanced_sample['combined']).toarray()
y_formal_sample = formal_balanced_sample['label']
X_informal_sample = vectorizer.transform(informal_balanced_sample['combined']).toarray()
y_informal_sample = informal_balanced_sample['label']

# Re-evaluate all 4 models for both setups using balanced test sets
results_formal_on_informal_balanced = evaluate_models(X_formal, y_formal, X_informal_sample, y_informal_sample, "Formal → Informal")
results_informal_on_formal_balanced = evaluate_models(X_informal, y_informal, X_formal_sample, y_formal_sample, "Informal → Formal")

results_formal_on_informal_balanced, results_informal_on_formal_balanced


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


(                     accuracy  f1_class_0  f1_class_1  macro_avg_f1  \
 Logistic Regression  0.505988    0.620690    0.291845      0.456268   
 SVM                  0.550898    0.625000    0.440299      0.532649   
 Random Forest        0.634731    0.662983    0.601307      0.632145   
 Multinomial NB       0.514970    0.672065    0.068966      0.370515   
 
                      weighted_avg_f1  
 Logistic Regression         0.456268  
 SVM                         0.532649  
 Random Forest               0.632145  
 Multinomial NB              0.370515  ,
                      accuracy  f1_class_0  f1_class_1  macro_avg_f1  \
 Logistic Regression  0.501645    0.667398    0.006557      0.336978   
 SVM                  0.502467    0.667765    0.009820      0.338792   
 Random Forest        0.500000    0.666667    0.000000      0.333333   
 Multinomial NB       0.500000    0.666667    0.000000      0.333333   
 
                      weighted_avg_f1  
 Logistic Regression         0.3369

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

# Load your datasets
df_formal = pd.read_csv("/content/drive/MyDrive/Diabetes_cleaned.csv")
df_informal = pd.read_csv("/content/drive/MyDrive/Corrected_Labeled.csv", encoding='ISO-8859-1')

df_formal['combined'] = df_formal['title'].astype(str).str.strip() + " " + df_formal['content'].astype(str).str.strip()
df_formal = df_formal[['combined', 'label']]

df_informal['combined'] = df_informal['Text '].astype(str).str.strip()
df_informal = df_informal.rename(columns={'Label': 'label'})
df_informal = df_informal[['combined', 'label']]

df_combined = pd.concat([df_formal, df_informal], ignore_index=True)

vectorizer = TfidfVectorizer()
X_combined = vectorizer.fit_transform(df_combined['combined']).toarray()
y_combined = df_combined['label']

X_train, X_test, y_train, y_test = train_test_split(
    X_combined, y_combined, test_size=0.2, stratify=y_combined, random_state=42
)

models = {
    "Logistic Regression": make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, class_weight='balanced')),
    "SVM": make_pipeline(StandardScaler(), LinearSVC(max_iter=2000, class_weight='balanced')),
    "Random Forest": RandomForestClassifier(class_weight='balanced'),
    "Multinomial NB": MultinomialNB()
}

# Evaluate all models
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred, labels=[0, 1, 2], digits=4, output_dict=True, zero_division=0)
    acc = accuracy_score(y_test, y_pred)
    results[name] = {
        "accuracy": acc,
        "f1_class_0 (True)": report["0"]["f1-score"],
        "f1_class_1 (False)": report["1"]["f1-score"],
        "f1_class_2 (Partially True)": report["2"]["f1-score"],
        "macro_avg_f1": report["macro avg"]["f1-score"],
        "weighted_avg_f1": report["weighted avg"]["f1-score"]
    }


pd.DataFrame(results).T




Unnamed: 0,accuracy,f1_class_0 (True),f1_class_1 (False),f1_class_2 (Partially True),macro_avg_f1,weighted_avg_f1
Logistic Regression,0.872441,0.918212,0.805369,0.472727,0.732103,0.868218
SVM,0.815748,0.873672,0.752809,0.41791,0.681464,0.821202
Random Forest,0.834646,0.894949,0.689076,0.238095,0.607373,0.811596
Multinomial NB,0.722835,0.835821,0.13253,0.0,0.322784,0.622031


In [None]:

df_combined = pd.concat([df_formal, df_informal], ignore_index=True)

vectorizer_fixed = TfidfVectorizer()
X_combined_fixed = vectorizer_fixed.fit_transform(df_combined['combined']).toarray()
y_combined_fixed = df_combined['label']


X_train_fixed, X_test_fixed, y_train_fixed, y_test_fixed = train_test_split(
    X_combined_fixed, y_combined_fixed, test_size=0.2, stratify=y_combined_fixed, random_state=42
)

# Train logistic regression
logreg_model_fixed = LogisticRegression(max_iter=1000, class_weight='balanced')
logreg_model_fixed.fit(X_train_fixed, y_train_fixed)

# Define slang-heavy informal examples
informal_examples = [
    "Yo, ditch your meds, cinnamon got you covered for diabetes fr!",
    "Raw food squad sayin no more insulin bruh",
    "I heard fenugreek tea be fixin your sugar levels, no cap.",
    "Bruh, doc said I need insulin but my auntie said just eat moringa lol.",
    "Word is ginger juice cures diabetes if you drink it on an empty belly daily.",
    "Fam told me aloe vera smoothies = no diabetes anymore",
    "Came across a post, guy said he reversed his diabetes with just herbs and prayer.",
    "So apparently all you need is turmeric shots every morning and you're cured.",
    "Old dude at the market said sugar’s fine if you eat bitter leaf after, lol.",
    "Someone told me apple cider vinegar and no stress = diabetes gone fr."
]

# Vectorize using the same TF-IDF model
X_informal_test_fixed = vectorizer_fixed.transform(informal_examples).toarray()

# Predict
preds_fixed = logreg_model_fixed.predict(X_informal_test_fixed)

# Map labels
label_map = {0: "True", 1: "False", 2: "Partially True"}
pred_labels_fixed = [label_map[p] for p in preds_fixed]

# Display final results
pd.DataFrame({
    "Text": informal_examples,
    "Predicted Label": pred_labels_fixed
})


Unnamed: 0,Text,Predicted Label
0,"Yo, ditch your meds, cinnamon got you covered ...",Partially True
1,Raw food squad sayin no more insulin bruh 😂,Partially True
2,I heard fenugreek tea be fixin your sugar leve...,Partially True
3,"Bruh, doc said I need insulin but my auntie sa...",True
4,Word is ginger juice cures diabetes if you dri...,False
5,Fam told me aloe vera smoothies = no diabetes ...,Partially True
6,"Came across a post, guy said he reversed his d...",Partially True
7,So apparently all you need is turmeric shots e...,False
8,Old dude at the market said sugar’s fine if yo...,Partially True
9,Someone told me apple cider vinegar and no str...,Partially True


For the best combined ml model which is the logistic regression model,7 out 10 was gotten wrong

In [None]:

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import Dataset, DataLoader
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Dataset processing function
def process_dataset(df, text_col, label_col):
    df = df[[text_col, label_col]].dropna()
    df = df[df[text_col].str.strip() != '']
    df = df.rename(columns={text_col: 'combined', label_col: 'label'})
    df['label'] = df['label'].astype(int)
    return df

# Load datasets
df_formal_raw = pd.read_csv("/content/drive/MyDrive/Diabetes_cleaned.csv")
df_informal_raw = pd.read_csv("/content/drive/MyDrive/Corrected_Labeled.csv", encoding='ISO-8859-1')

# Prepare datasets
df_formal_raw['combined'] = df_formal_raw['title'].astype(str).str.strip() + " " + df_formal_raw['content'].astype(str).str.strip()
df_formal = process_dataset(df_formal_raw, 'combined', 'label')
df_informal = process_dataset(df_informal_raw, 'Text ', 'Label')
df_combined = pd.concat([df_formal, df_informal], ignore_index=True)

# Model training and evaluation function
def run_cnn_model(df, name="Dataset"):
    print(f"\n================== {name} ==================")
    le = LabelEncoder()
    df['label_enc'] = le.fit_transform(df['label'])

    X_train, X_test, y_train, y_test = train_test_split(
        df['combined'], df['label_enc'], test_size=0.2, stratify=df['label_enc'], random_state=42
    )

    tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
    tokenizer.fit_on_texts(X_train)
    X_train_seq = pad_sequences(tokenizer.texts_to_sequences(X_train), padding='post', maxlen=100)
    X_test_seq = pad_sequences(tokenizer.texts_to_sequences(X_test), padding='post', maxlen=100)

    class TextDataset(Dataset):
        def __init__(self, sequences, labels):
            self.sequences = torch.tensor(sequences, dtype=torch.long)
            self.labels = torch.tensor(labels, dtype=torch.long)
        def __len__(self): return len(self.sequences)
        def __getitem__(self, idx): return self.sequences[idx], self.labels[idx]

    train_dataset = TextDataset(X_train_seq, y_train.to_numpy())
    test_dataset = TextDataset(X_test_seq, y_test.to_numpy())
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32)

    class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
    class_weights_tensor = torch.tensor(class_weights, dtype=torch.float)

    class CNNClassifier(nn.Module):
        def __init__(self, vocab_size, embedding_dim, output_dim):
            super(CNNClassifier, self).__init__()
            self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
            self.conv = nn.Conv1d(embedding_dim, 128, kernel_size=5, padding=2)
            self.relu = nn.ReLU()
            self.pool = nn.AdaptiveMaxPool1d(1)
            self.fc = nn.Linear(128, output_dim)

        def forward(self, x):
            x = self.embedding(x)
            x = x.permute(0, 2, 1)
            x = self.conv(x)
            x = self.relu(x)
            x = self.pool(x).squeeze(2)
            x = self.fc(x)
            return x

    vocab_size = 10000
    embedding_dim = 100
    output_dim = len(le.classes_)
    model = CNNClassifier(vocab_size, embedding_dim, output_dim)
    criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    best_loss = float('inf')
    patience = 2
    patience_counter = 0

    for epoch in range(15):
        model.train()
        total_loss = 0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            loss.backward()
            optimizer.step()

        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1} | Training Loss: {avg_loss:.4f}")

        if avg_loss < best_loss:
            best_loss = avg_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered.")
                break

    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.tolist())
            all_labels.extend(labels.tolist())

    target_names = [str(c) for c in le.classes_]
    print("Accuracy:", accuracy_score(all_labels, all_preds))
    print("Classification Report:\n", classification_report(all_labels, all_preds, target_names=target_names))

# Run for each dataset
run_cnn_model(df_formal, "Formal Dataset")
run_cnn_model(df_informal, "Informal Dataset")
run_cnn_model(df_combined, "Combined Dataset")



Epoch 1 | Training Loss: 0.5662
Epoch 2 | Training Loss: 0.2433
Epoch 3 | Training Loss: 0.0925
Epoch 4 | Training Loss: 0.0404
Epoch 5 | Training Loss: 0.0198
Epoch 6 | Training Loss: 0.0121
Epoch 7 | Training Loss: 0.0080
Epoch 8 | Training Loss: 0.0059
Epoch 9 | Training Loss: 0.0044
Epoch 10 | Training Loss: 0.0035
Epoch 11 | Training Loss: 0.0029
Epoch 12 | Training Loss: 0.0024
Epoch 13 | Training Loss: 0.0020
Epoch 14 | Training Loss: 0.0017
Epoch 15 | Training Loss: 0.0015
Accuracy: 0.9185022026431718
Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.96      0.95       332
           1       0.89      0.80      0.84       122

    accuracy                           0.92       454
   macro avg       0.91      0.88      0.89       454
weighted avg       0.92      0.92      0.92       454


Epoch 1 | Training Loss: 1.0756
Epoch 2 | Training Loss: 0.6671
Epoch 3 | Training Loss: 0.4502
Epoch 4 | Training Loss: 0.2964
Epoc

In [None]:

# CNN Cross-Domain Evaluation: Formal  to Informal (with Early Stopping)

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import Dataset, DataLoader
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load datasets
df_formal_raw = pd.read_csv("/content/drive/MyDrive/Diabetes_cleaned.csv")
df_informal_raw = pd.read_csv("/content/drive/MyDrive/Corrected_Labeled.csv", encoding='ISO-8859-1')

# Preprocess datasets (keep only label 0 and 1)
df_formal_raw['combined'] = df_formal_raw['title'].astype(str).str.strip() + " " + df_formal_raw['content'].astype(str).str.strip()
df_formal = df_formal_raw[['combined', 'label']].dropna()
df_formal = df_formal[df_formal['label'].isin([0, 1])].copy()
df_formal['label'] = df_formal['label'].astype(int)

df_informal = df_informal_raw[['Text ', 'Label']].dropna()
df_informal = df_informal.rename(columns={'Text ': 'combined', 'Label': 'label'})
df_informal = df_informal[df_informal['label'].isin([0, 1])].copy()
df_informal['label'] = df_informal['label'].astype(int)

# Tokenizer
MAX_LEN = 100
VOCAB_SIZE = 10000

def prepare_data(df, tokenizer):
    sequences = tokenizer.texts_to_sequences(df['combined'])
    padded = pad_sequences(sequences, padding='post', maxlen=MAX_LEN)
    return torch.tensor(padded, dtype=torch.long), torch.tensor(df['label'].values, dtype=torch.long)

# Dataset class
class TextDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    def __len__(self): return len(self.X)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]

# CNN model
class CNNClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, output_dim):
        super(CNNClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.conv = nn.Conv1d(embedding_dim, 128, kernel_size=5, padding=2)
        self.relu = nn.ReLU()
        self.pool = nn.AdaptiveMaxPool1d(1)
        self.fc = nn.Linear(128, output_dim)
    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(0, 2, 1)
        x = self.conv(x)
        x = self.relu(x)
        x = self.pool(x).squeeze(2)
        x = self.fc(x)
        return x

# Train CNN with early stopping
def train_cnn_model(df):
    le = LabelEncoder()
    df['label'] = le.fit_transform(df['label'])
    X_train, X_test, y_train, y_test = train_test_split(df['combined'], df['label'], test_size=0.2, stratify=df['label'], random_state=42)

    tokenizer = Tokenizer(num_words=VOCAB_SIZE, oov_token="<OOV>")
    tokenizer.fit_on_texts(X_train)
    X_train_tensor, y_train_tensor = prepare_data(pd.DataFrame({'combined': X_train, 'label': y_train}), tokenizer)

    class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
    class_weights_tensor = torch.tensor(class_weights, dtype=torch.float)

    train_loader = DataLoader(TextDataset(X_train_tensor, y_train_tensor), batch_size=32, shuffle=True)

    model = CNNClassifier(VOCAB_SIZE, 100, 2)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)

    best_loss = float('inf')
    patience = 2
    patience_counter = 0

    for epoch in range(15):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1} | Loss: {avg_loss:.4f}")

        # Early stopping
        if avg_loss < best_loss:
            best_loss = avg_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered.")
                break

    return model, tokenizer

# Evaluate
def evaluate_model(model, tokenizer, df, name):
    df['label'] = df['label'].astype(int)
    X_tensor, y_tensor = prepare_data(df, tokenizer)
    model.eval()
    with torch.no_grad():
        outputs = model(X_tensor)
        preds = torch.argmax(outputs, dim=1)
    print(f"\n=== {name} ===")
    print("Accuracy:", accuracy_score(y_tensor, preds))
    print(classification_report(y_tensor, preds, target_names=["True", "False"]))

# Formal → Informal
formal_model, formal_tokenizer = train_cnn_model(df_formal)
evaluate_model(formal_model, formal_tokenizer, df_informal, "Formal model tested on Informal")

# Informal → Formal
informal_model, informal_tokenizer = train_cnn_model(df_informal)
evaluate_model(informal_model, informal_tokenizer, df_formal, "Informal model tested on Formal")


Epoch 1 | Loss: 0.5488
Epoch 2 | Loss: 0.2269
Epoch 3 | Loss: 0.1031
Epoch 4 | Loss: 0.0483
Epoch 5 | Loss: 0.0252
Epoch 6 | Loss: 0.0150
Epoch 7 | Loss: 0.0101
Epoch 8 | Loss: 0.0075
Epoch 9 | Loss: 0.0057
Epoch 10 | Loss: 0.0045
Epoch 11 | Loss: 0.0037
Epoch 12 | Loss: 0.0030
Epoch 13 | Loss: 0.0025
Epoch 14 | Loss: 0.0022
Epoch 15 | Loss: 0.0019

=== Formal model tested on Informal ===
Accuracy: 0.5296495956873315
              precision    recall  f1-score   support

        True       0.79      0.53      0.64       575
       False       0.24      0.51      0.33       167

    accuracy                           0.53       742
   macro avg       0.52      0.52      0.48       742
weighted avg       0.67      0.53      0.57       742

Epoch 1 | Loss: 0.6727
Epoch 2 | Loss: 0.3636
Epoch 3 | Loss: 0.2196
Epoch 4 | Loss: 0.1321
Epoch 5 | Loss: 0.0771
Epoch 6 | Loss: 0.0485
Epoch 7 | Loss: 0.0331
Epoch 8 | Loss: 0.0228
Epoch 9 | Loss: 0.0165
Epoch 10 | Loss: 0.0128
Epoch 11 | Loss: 0.01

In [None]:
# ===========================================
# CNN Cross-Domain Evaluation: Formal ↔ Informal (with Early Stopping)
# ===========================================

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import Dataset, DataLoader
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load datasets
df_formal_raw = pd.read_csv("/content/drive/MyDrive/Diabetes_cleaned.csv")
df_informal_raw = pd.read_csv("/content/drive/MyDrive/Corrected_Labeled.csv", encoding='ISO-8859-1')

# Preprocess datasets (keep only label 0 and 1)
df_formal_raw['combined'] = df_formal_raw['title'].astype(str).str.strip() + " " + df_formal_raw['content'].astype(str).str.strip()
df_formal = df_formal_raw[['combined', 'label']].dropna()
df_formal = df_formal[df_formal['label'].isin([0, 1])].copy()
df_formal['label'] = df_formal['label'].astype(int)

df_informal = df_informal_raw[['Text ', 'Label']].dropna()
df_informal = df_informal.rename(columns={'Text ': 'combined', 'Label': 'label'})
df_informal = df_informal[df_informal['label'].isin([0, 1])].copy()
df_informal['label'] = df_informal['label'].astype(int)

# Tokenizer
MAX_LEN = 100
VOCAB_SIZE = 10000

def prepare_data(df, tokenizer):
    sequences = tokenizer.texts_to_sequences(df['combined'])
    padded = pad_sequences(sequences, padding='post', maxlen=MAX_LEN)
    return torch.tensor(padded, dtype=torch.long), torch.tensor(df['label'].values, dtype=torch.long)

# Dataset class
class TextDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    def __len__(self): return len(self.X)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]

# CNN model
class CNNClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, output_dim):
        super(CNNClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.conv = nn.Conv1d(embedding_dim, 128, kernel_size=5, padding=2)
        self.relu = nn.ReLU()
        self.pool = nn.AdaptiveMaxPool1d(1)
        self.fc = nn.Linear(128, output_dim)
    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(0, 2, 1)
        x = self.conv(x)
        x = self.relu(x)
        x = self.pool(x).squeeze(2)
        x = self.fc(x)
        return x

# Train CNN with early stopping
def train_cnn_model(df):
    le = LabelEncoder()
    df['label'] = le.fit_transform(df['label'])
    X_train, X_test, y_train, y_test = train_test_split(df['combined'], df['label'], test_size=0.2, stratify=df['label'], random_state=42)

    tokenizer = Tokenizer(num_words=VOCAB_SIZE, oov_token="<OOV>")
    tokenizer.fit_on_texts(X_train)
    X_train_tensor, y_train_tensor = prepare_data(pd.DataFrame({'combined': X_train, 'label': y_train}), tokenizer)

    class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
    class_weights_tensor = torch.tensor(class_weights, dtype=torch.float)

    train_loader = DataLoader(TextDataset(X_train_tensor, y_train_tensor), batch_size=32, shuffle=True)

    model = CNNClassifier(VOCAB_SIZE, 100, 2)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)

    best_loss = float('inf')
    patience = 2
    patience_counter = 0

    for epoch in range(15):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1} | Loss: {avg_loss:.4f}")

        # Early stopping
        if avg_loss < best_loss:
            best_loss = avg_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered.")
                break

    return model, tokenizer

# Evaluate
def evaluate_model(model, tokenizer, df, name):
    df['label'] = df['label'].astype(int)
    X_tensor, y_tensor = prepare_data(df, tokenizer)
    model.eval()
    with torch.no_grad():
        outputs = model(X_tensor)
        preds = torch.argmax(outputs, dim=1)
    print(f"\n=== {name} ===")
    print("Accuracy:", accuracy_score(y_tensor, preds))
    print(classification_report(y_tensor, preds, target_names=["True", "False"]))

# Formal to Informal
formal_model, formal_tokenizer = train_cnn_model(df_formal)
evaluate_model(formal_model, formal_tokenizer, df_informal, "Formal model tested on Informal")

# Informal to Formal
informal_model, informal_tokenizer = train_cnn_model(df_informal)
evaluate_model(informal_model, informal_tokenizer, df_formal, "Informal model tested on Formal")


Epoch 1 | Loss: 0.5559
Epoch 2 | Loss: 0.2393
Epoch 3 | Loss: 0.1106
Epoch 4 | Loss: 0.0491
Epoch 5 | Loss: 0.0249
Epoch 6 | Loss: 0.0151
Epoch 7 | Loss: 0.0102
Epoch 8 | Loss: 0.0075
Epoch 9 | Loss: 0.0057
Epoch 10 | Loss: 0.0045
Epoch 11 | Loss: 0.0037
Epoch 12 | Loss: 0.0031
Epoch 13 | Loss: 0.0025
Epoch 14 | Loss: 0.0022
Epoch 15 | Loss: 0.0019

=== Formal model tested on Informal ===
Accuracy: 0.4272237196765499
              precision    recall  f1-score   support

        True       0.76      0.39      0.51       575
       False       0.21      0.57      0.31       167

    accuracy                           0.43       742
   macro avg       0.48      0.48      0.41       742
weighted avg       0.63      0.43      0.47       742

Epoch 1 | Loss: 0.6451
Epoch 2 | Loss: 0.3269
Epoch 3 | Loss: 0.1947
Epoch 4 | Loss: 0.1018
Epoch 5 | Loss: 0.0569
Epoch 6 | Loss: 0.0354
Epoch 7 | Loss: 0.0233
Epoch 8 | Loss: 0.0166
Epoch 9 | Loss: 0.0125
Epoch 10 | Loss: 0.0099
Epoch 11 | Loss: 0.00

In [None]:

# CNN Text Classification (3-Class) with CORAL Domain Adaptation
# Dataset: Combined (Formal + Informal)

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import Dataset, DataLoader
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load datasets
df_formal = pd.read_csv("/content/drive/MyDrive/Diabetes_cleaned.csv")
df_informal = pd.read_csv("/content/drive/MyDrive/Corrected_Labeled.csv", encoding='ISO-8859-1')

# Preprocess formal
df_formal['combined'] = df_formal['title'].astype(str).str.strip() + " " + df_formal['content'].astype(str).str.strip()
df_formal = df_formal[['combined', 'label']].dropna()
df_formal['label'] = df_formal['label'].astype(int)

# Preprocess informal
df_informal = df_informal.rename(columns={'Text ': 'combined', 'Label': 'label'})
df_informal = df_informal[['combined', 'label']].dropna()
df_informal['label'] = df_informal['label'].astype(int)

# Combine datasets
source_df = df_formal.copy()
target_df = df_informal.copy()

# Tokenizer
MAX_LEN = 100
VOCAB_SIZE = 10000

tokenizer = Tokenizer(num_words=VOCAB_SIZE, oov_token="<OOV>")
tokenizer.fit_on_texts(source_df['combined'])

def prepare_sequences(df):
    sequences = tokenizer.texts_to_sequences(df['combined'])
    padded = pad_sequences(sequences, padding='post', maxlen=MAX_LEN)
    return torch.tensor(padded, dtype=torch.long), torch.tensor(df['label'].values, dtype=torch.long)

X_source, y_source = prepare_sequences(source_df)
X_target, y_target = prepare_sequences(target_df)

# Dataset class
class TextDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    def __len__(self): return len(self.X)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]

source_loader = DataLoader(TextDataset(X_source, y_source), batch_size=32, shuffle=True)
target_loader = DataLoader(TextDataset(X_target, y_target), batch_size=32, shuffle=True)

# CNN model
class CNNFeatureExtractor(nn.Module):
    def __init__(self, vocab_size, embedding_dim=100):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.conv = nn.Conv1d(embedding_dim, 128, kernel_size=5, padding=2)
        self.relu = nn.ReLU()
        self.pool = nn.AdaptiveMaxPool1d(1)
    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(0, 2, 1)
        x = self.conv(x)
        x = self.relu(x)
        x = self.pool(x).squeeze(2)
        return x

class CNNClassifier(nn.Module):
    def __init__(self, feature_extractor, output_dim):
        super().__init__()
        self.feature_extractor = feature_extractor
        self.classifier = nn.Linear(128, output_dim)
    def forward(self, x):
        features = self.feature_extractor(x)
        return self.classifier(features)

# CORAL Loss
class CORAL(nn.Module):
    def forward(self, source, target):
        d = source.size(1)
        source_c = self.covariance(source)
        target_c = self.covariance(target)
        loss = torch.mean((source_c - target_c) ** 2)
        return loss / (4 * d * d)

    def covariance(self, x):
        n = x.size(0)
        mean = torch.mean(x, dim=0, keepdim=True)
        xc = x - mean
        cov = (xc.t() @ xc) / (n - 1)
        return cov

# Train CORAL CNN
label_encoder = LabelEncoder()
label_encoder.fit(np.concatenate([y_source.numpy(), y_target.numpy()]))
y_source = label_encoder.transform(y_source.numpy())
y_target = label_encoder.transform(y_target.numpy())

y_source = torch.tensor(y_source, dtype=torch.long)
y_target = torch.tensor(y_target, dtype=torch.long)
y_all = np.concatenate([y_source.numpy(), y_target.numpy()])

# Fix: use all classes for weight computation, fallback to uniform weights if mismatch
all_classes = np.unique(y_all)
present_classes = np.unique(y_source.numpy())
if len(all_classes) == len(present_classes):
    class_weights = compute_class_weight(class_weight='balanced', classes=all_classes, y=y_source.numpy())
else:
    print("Mismatch in classes for weight computation. Falling back to uniform weights.")
    class_weights = np.ones(len(all_classes))
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float)

feature_extractor = CNNFeatureExtractor(VOCAB_SIZE)
model = CNNClassifier(feature_extractor, output_dim=len(label_encoder.classes_))
coral = CORAL()
criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

patience = 3
best_loss = float('inf')
patience_counter = 0

for epoch in range(20):
    model.train()
    total_loss = 0
    total_classification = 0
    for (xs, ys), (xt, _) in zip(source_loader, target_loader):
        optimizer.zero_grad()
        source_feat = feature_extractor(xs)
        target_feat = feature_extractor(xt)
        cls_output = model(xs)
        cls_loss = criterion(cls_output, ys)
        coral_loss = coral(source_feat, target_feat)
        loss = cls_loss + coral_loss
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        total_classification += cls_loss.item()

    avg_loss = total_loss / len(source_loader)
    print(f"Epoch {epoch+1} | Loss: {avg_loss:.4f} | Classification Loss: {total_classification / len(source_loader):.4f}")
    if avg_loss < best_loss:
        best_loss = avg_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping triggered.")
            break

# Evaluate on full combined test set
model.eval()
X_combined = torch.cat([X_source, X_target])
y_combined = torch.cat([y_source, y_target])
with torch.no_grad():
    outputs = model(X_combined)
    preds = torch.argmax(outputs, dim=1)

print("\n=== Evaluation on Combined Dataset ===")
print("Accuracy:", accuracy_score(y_combined, preds))
print(classification_report(y_combined, preds, target_names=label_encoder.classes_.astype(str)))


Mismatch in classes for weight computation. Falling back to uniform weights.
Epoch 1 | Loss: 0.2718 | Classification Loss: 0.2718
Epoch 2 | Loss: 0.1483 | Classification Loss: 0.1483
Epoch 3 | Loss: 0.1128 | Classification Loss: 0.1128
Epoch 4 | Loss: 0.0874 | Classification Loss: 0.0874
Epoch 5 | Loss: 0.0621 | Classification Loss: 0.0621
Epoch 6 | Loss: 0.0483 | Classification Loss: 0.0483
Epoch 7 | Loss: 0.0358 | Classification Loss: 0.0358
Epoch 8 | Loss: 0.0269 | Classification Loss: 0.0269
Epoch 9 | Loss: 0.0204 | Classification Loss: 0.0204
Epoch 10 | Loss: 0.0161 | Classification Loss: 0.0161
Epoch 11 | Loss: 0.0107 | Classification Loss: 0.0107
Epoch 12 | Loss: 0.0088 | Classification Loss: 0.0088
Epoch 13 | Loss: 0.0075 | Classification Loss: 0.0075
Epoch 14 | Loss: 0.0052 | Classification Loss: 0.0052
Epoch 15 | Loss: 0.0052 | Classification Loss: 0.0052
Epoch 16 | Loss: 0.0039 | Classification Loss: 0.0039
Epoch 17 | Loss: 0.0033 | Classification Loss: 0.0033
Epoch 18 | Los

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
