<a href="https://colab.research.google.com/github/Chaubeyji965/Reply-Classification-Pipeline/blob/main/SvaraAi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:

import pandas as pd
import numpy as np
import re
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, classification_report
import pickle
import warnings
warnings.filterwarnings('ignore')


df = pd.read_csv('reply_classification_dataset.csv')


df['label'] = df['label'].str.lower()
df['label'] = df['label'].replace({
    'positive': 'positive', 'pos': 'positive',
    'negative': 'negative', 'neg': 'negative',
    'neutral': 'neutral'
})
df_clean = df.drop_duplicates().reset_index(drop=True)


def preprocess_text(text):
    text = text.lower().strip()
    text = re.sub(r'[?!]{2,}', '', text)
    text = re.sub(r',+', ',', text)
    text = text.replace(' u ', ' you ').replace(' plz ', ' please ').replace(' w/ ', ' with ')
    text = text.replace('schdule', 'schedule').replace('intrsted', 'interested')
    text = text.replace('alredy', 'already').replace('oppurtunity', 'opportunity')
    text = text.replace('intrest', 'interest').replace('commited', 'committed')
    text = text.replace('lets', 'let us')
    return text

df_clean['cleaned_text'] = df_clean['reply'].apply(preprocess_text)


X = df_clean['cleaned_text']
y = df_clean['label']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1,3), stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc = le.transform(y_test)


X_train_tensor = torch.tensor(X_train_tfidf.toarray(), dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_tfidf.toarray(), dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_enc, dtype=torch.long)
y_test_tensor = torch.tensor(y_test_enc, dtype=torch.long)


train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16)

class TextNN(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(TextNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 512)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(512, 256)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.fc3(x)
        return x

model = TextNN(X_train_tensor.shape[1], len(le.classes_))

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


epochs = 20
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for xb, yb in train_loader:
        optimizer.zero_grad()
        outputs = model(xb)
        loss = criterion(outputs, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")


model.eval()
all_preds = []
with torch.no_grad():
    for xb, _ in test_loader:
        outputs = model(xb)
        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.numpy())

print("Neural Network Results:")
print(f"Accuracy: {accuracy_score(y_test_tensor, all_preds):.4f}")
print(f"F1 Score: {f1_score(y_test_tensor, all_preds, average='weighted'):.4f}")
print(classification_report(y_test_tensor, all_preds, target_names=le.classes_))


with open('nn_model.pkl', 'wb') as f:
    pickle.dump(model, f)

with open('vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)

with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(le, f)

print("Model, vectorizer, and label encoder saved successfully!")


def predict_reply(text, model, vectorizer, label_encoder):
    processed_text = preprocess_text(text)
    text_tfidf = vectorizer.transform([processed_text])
    text_tensor = torch.tensor(text_tfidf.toarray(), dtype=torch.float32)
    model.eval()
    with torch.no_grad():
        outputs = model(text_tensor)
        probs = torch.softmax(outputs, dim=1).numpy()[0]
        pred_idx = probs.argmax()
        prediction = label_encoder.inverse_transform([pred_idx])[0]
        confidence = probs[pred_idx]
    return prediction, confidence

test_texts = [
    "I'm excited to see the demo!",
    "Not interested, please remove me",
    "Can you send pricing details?"
]

for text in test_texts:
    pred, conf = predict_reply(text, model, vectorizer, le)
    print(f"Text: '{text}'")
    print(f"Prediction: {pred} (Confidence: {conf:.3f})\n")


Epoch 1/20, Loss: 1.0832
Epoch 2/20, Loss: 0.9237
Epoch 3/20, Loss: 0.4558
Epoch 4/20, Loss: 0.0986
Epoch 5/20, Loss: 0.0116
Epoch 6/20, Loss: 0.0025
Epoch 7/20, Loss: 0.0012
Epoch 8/20, Loss: 0.0008
Epoch 9/20, Loss: 0.0006
Epoch 10/20, Loss: 0.0005
Epoch 11/20, Loss: 0.0004
Epoch 12/20, Loss: 0.0003
Epoch 13/20, Loss: 0.0003
Epoch 14/20, Loss: 0.0002
Epoch 15/20, Loss: 0.0002
Epoch 16/20, Loss: 0.0001
Epoch 17/20, Loss: 0.0001
Epoch 18/20, Loss: 0.0001
Epoch 19/20, Loss: 0.0001
Epoch 20/20, Loss: 0.0001
Neural Network Results:
Accuracy: 0.9231
F1 Score: 0.9240
              precision    recall  f1-score   support

    negative       0.94      0.94      0.94        17
     neutral       1.00      0.88      0.94        25
    positive       0.85      0.96      0.90        23

    accuracy                           0.92        65
   macro avg       0.93      0.93      0.93        65
weighted avg       0.93      0.92      0.92        65

Model, vectorizer, and label encoder saved success