# Reply Classification ML Pipeline

This notebook implements a complete ML pipeline for classifying email replies into positive, negative, and neutral categories.

In [None]:
import pandas as pd
import numpy as np
import re
from sklearn.svm import SVC
import warnings
warnings.filterwarnings('ignore')

## Data Loading and Exploration

In [3]:
df = pd.read_csv('reply_classification_dataset.csv')
print(f"Dataset shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")
print(f"\nLabel distribution:\n{df['label'].value_counts()}")

Dataset shape: (2129, 2)
Columns: ['reply', 'label']

Label distribution:
label
neutral     704
positive    446
NEGATIVE    267
POSITIVE    263
Negative    254
negative    189
Neutral       3
NEUTRAL       2
Positive      1
Name: count, dtype: int64


## Data Preprocessing

In [4]:
df['label'] = df['label'].str.lower()
df_clean = df.drop_duplicates().reset_index(drop=True)

def preprocess_text(text):
    text = text.lower()
    text = text.strip()
    text = re.sub(r'[?!]{2,}', '', text)
    text = re.sub(r',+', ',', text)
    text = text.replace(' u ', ' you ')
    text = text.replace(' plz ', ' please ')
    text = text.replace(' w/ ', ' with ')
    text = text.replace('schdule', 'schedule')
    text = text.replace('intrsted', 'interested')
    text = text.replace('alredy', 'already')
    text = text.replace('oppurtunity', 'opportunity')
    text = text.replace('intrest', 'interest')
    text = text.replace('commited', 'committed')
    text = text.replace('lets', 'let us')
    return text

df_clean['cleaned_text'] = df_clean['reply'].apply(preprocess_text)
print(f"After cleaning: {df_clean.shape}")
print(f"Label distribution: {df_clean['label'].value_counts()}")


After cleaning: (321, 3)
Label distribution: label
neutral     121
positive    114
negative     86
Name: count, dtype: int64


## Model Training and Evaluation

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

In [5]:
X = df_clean['cleaned_text']
y = df_clean['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

vectorizer = TfidfVectorizer(max_features=1000, ngram_range=(1, 2), stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

print(f"Training set: {len(X_train)} samples")
print(f"Test set: {len(X_test)} samples")
print(f"TF-IDF features: {X_train_tfidf.shape[1]}")

Training set: 256 samples
Test set: 65 samples
TF-IDF features: 458


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, f1_score

In [6]:
lr_model = LogisticRegression(random_state=42, max_iter=1000)
lr_model.fit(X_train_tfidf, y_train)
lr_pred = lr_model.predict(X_test_tfidf)

print("Logistic Regression Results:")
print(f"Accuracy: {accuracy_score(y_test, lr_pred):.4f}")
print(f"F1 Score: {f1_score(y_test, lr_pred, average='weighted'):.4f}")
print(f"\n{classification_report(y_test, lr_pred)}")

Logistic Regression Results:
Accuracy: 0.9538
F1 Score: 0.9544

              precision    recall  f1-score   support

    negative       1.00      0.94      0.97        17
     neutral       1.00      0.92      0.96        25
    positive       0.88      1.00      0.94        23

    accuracy                           0.95        65
   macro avg       0.96      0.95      0.96        65
weighted avg       0.96      0.95      0.95        65



In [None]:
from sklearn.ensemble import RandomForestClassifier, VotingClassifier

In [7]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_tfidf, y_train)
rf_pred = rf_model.predict(X_test_tfidf)

print("Random Forest Results:")
print(f"Accuracy: {accuracy_score(y_test, rf_pred):.4f}")
print(f"F1 Score: {f1_score(y_test, rf_pred, average='weighted'):.4f}")
print(f"\n{classification_report(y_test, rf_pred)}")

Random Forest Results:
Accuracy: 0.8923
F1 Score: 0.8929

              precision    recall  f1-score   support

    negative       0.74      1.00      0.85        17
     neutral       1.00      0.76      0.86        25
    positive       0.96      0.96      0.96        23

    accuracy                           0.89        65
   macro avg       0.90      0.91      0.89        65
weighted avg       0.92      0.89      0.89        65



In [13]:
from sklearn.naive_bayes import MultinomialNB

In [8]:
svm_model = SVC(probability=True, random_state=42)
nb_model = MultinomialNB()

ensemble_model = VotingClassifier(
    estimators=[
        ('lr', lr_model),
        ('svm', svm_model),
        ('nb', nb_model)
    ],
    voting='soft'
)

ensemble_model.fit(X_train_tfidf, y_train)
ensemble_pred = ensemble_model.predict(X_test_tfidf)

print("Ensemble Model Results:")
print(f"Accuracy: {accuracy_score(y_test, ensemble_pred):.4f}")
print(f"F1 Score: {f1_score(y_test, ensemble_pred, average='weighted'):.4f}")
print(f"\n{classification_report(y_test, ensemble_pred)}")

Ensemble Model Results:
Accuracy: 0.9538
F1 Score: 0.9544

              precision    recall  f1-score   support

    negative       1.00      0.94      0.97        17
     neutral       1.00      0.92      0.96        25
    positive       0.88      1.00      0.94        23

    accuracy                           0.95        65
   macro avg       0.96      0.95      0.96        65
weighted avg       0.96      0.95      0.95        65



## Model Selection and Saving

In [14]:
import pickle

In [9]:
best_model = ensemble_model

with open('best_model.pkl', 'wb') as f:
    pickle.dump(best_model, f)

with open('vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)

print("Model and vectorizer saved successfully!")

Model and vectorizer saved successfully!


## Testing Predictions

In [10]:
def predict_reply(text, model, vectorizer):
    processed_text = preprocess_text(text)
    text_tfidf = vectorizer.transform([processed_text])
    prediction = model.predict(text_tfidf)[0]
    probabilities = model.predict_proba(text_tfidf)[0]
    confidence = max(probabilities)
    return prediction, confidence

test_texts = [
    "I'm excited to see the demo!",
    "Not interested, please remove me",
    "Can you send pricing details?"
]

for text in test_texts:
    pred, conf = predict_reply(text, best_model, vectorizer)
    print(f"Text: '{text}'")
    print(f"Prediction: {pred} (Confidence: {conf:.3f})\n")

Text: 'I'm excited to see the demo!'
Prediction: positive (Confidence: 0.763)

Text: 'Not interested, please remove me'
Prediction: negative (Confidence: 0.728)

Text: 'Can you send pricing details?'
Prediction: neutral (Confidence: 0.821)

