In [None]:

# ================================
# 0️⃣ Setup paths for src and models
# ================================
import sys
import os

# Add src folder to Python path (src is in parent folder)
sys.path.append(os.path.join(os.getcwd(), "..", "src"))

from preprocessing import preprocess_text  # import preprocessing function
import joblib

# ================================
# 1️⃣ Load trained model and vectorizer
# ================================
# Models are in parent folder
model_path = os.path.join("..", "model_sgd_weighted.pkl")
vectorizer_path = os.path.join("..", "tfidf_vectorizer_weighted.pkl")

model = joblib.load(model_path)
vectorizer = joblib.load(vectorizer_path)

# ================================
# 2️⃣ Sample complaints for all 4 categories
# ================================
sample_complaints = [
    # Consumer Loan
    "I took a personal loan from ABC bank, but they overcharged my interest.",
    "My loan application was rejected despite meeting all criteria.",
    
    # Mortgage
    "Bank increased my mortgage interest rate without notice.",
    "My mortgage payment was applied incorrectly and caused late fees.",
    
    # Debt Collection
    "Debt collection agency is harassing me with repeated calls.",
    "I received a debt collection notice for a loan I already paid.",
    
    # Credit Reporting / Credit Repair
    "My credit report shows incorrect late payments.",
    "Credit bureau did not correct errors I reported on my credit file."
]

# ================================
# 3️⃣ Preprocess complaints
# ================================
clean_texts = [preprocess_text(c) for c in sample_complaints]

# ================================
# 4️⃣ Vectorize
# ================================
X_test = vectorizer.transform(clean_texts)

# ================================
# 5️⃣ Predict
# ================================
y_pred = model.predict(X_test)

# Map numeric labels back to category names
category_map_reverse = {
    0: "Credit reporting, credit repair services, or other personal consumer reports",
    1: "Debt collection",
    2: "Consumer Loan",
    3: "Mortgage"
}

predicted_categories = [category_map_reverse[label] for label in y_pred]

# ================================
# 6️⃣ Display predictions
# ================================
for i, complaint in enumerate(sample_complaints):
    print(f"Complaint {i+1}: {complaint}")
    print(f"Predicted Category: {predicted_categories[i]}\n")


Complaint 1: I took a personal loan from ABC bank, but they overcharged my interest.
Predicted Category: Consumer Loan

Complaint 2: My loan application was rejected despite meeting all criteria.
Predicted Category: Consumer Loan

Complaint 3: Bank increased my mortgage interest rate without notice.
Predicted Category: Mortgage

Complaint 4: My mortgage payment was applied incorrectly and caused late fees.
Predicted Category: Mortgage

Complaint 5: Debt collection agency is harassing me with repeated calls.
Predicted Category: Debt collection

Complaint 6: I received a debt collection notice for a loan I already paid.
Predicted Category: Debt collection

Complaint 7: My credit report shows incorrect late payments.
Predicted Category: Credit reporting, credit repair services, or other personal consumer reports

Complaint 8: Credit bureau did not correct errors I reported on my credit file.
Predicted Category: Credit reporting, credit repair services, or other personal consumer reports



# Task 5 - Text Classification
**Candidate Name:** Bathula Sai Teja  
**Date/Time:** 2025-09-27

In [12]:

import sys
import os

# Add src folder to Python path
sys.path.append(os.path.join(os.getcwd(), "..", "src"))

from preprocessing import preprocess_text
import joblib

# ================================
# 1️⃣ Load trained model and vectorizer
# ================================
model_path = os.path.join("..", "model_sgd_weighted.pkl")
vectorizer_path = os.path.join("..", "tfidf_vectorizer_weighted.pkl")

model = joblib.load(model_path)
vectorizer = joblib.load(vectorizer_path)

# ================================
# 2️⃣ Take user input
# ================================
user_input = input("Enter your consumer complaint text: ")

# ================================
# 3️⃣ Preprocess the input
# ================================
clean_text = preprocess_text(user_input)

# ================================
# 4️⃣ Vectorize the input
# ================================
X_input = vectorizer.transform([clean_text])

# ================================
# 5️⃣ Predict
# ================================
y_pred = model.predict(X_input)

# Map numeric label back to category name
category_map_reverse = {
    0: "Credit reporting, credit repair services, or other personal consumer reports",
    1: "Debt collection",
    2: "Consumer Loan",
    3: "Mortgage"
}
predicted_category = category_map_reverse[y_pred[0]]

print("\nPredicted Category:", predicted_category)


Predicted Category: Mortgage


In [11]:
import pandas as pd

# Suppose sample_complaints is your list of input complaints
df_predictions = pd.DataFrame({
    "Complaint": sample_complaints,
    "Predicted_Category": predicted_categories
})

# Save CSV in project folder
df_predictions.to_csv("../predictions.csv", index=False)
print("Predictions saved to predictions.csv")


Predictions saved to predictions.csv
