In [None]:
# 📦 1. Imports
import pandas as pd
import numpy as np
import re
import string
import spacy
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from textblob import TextBlob
import nltk

# Download NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# 📄 2. Sample Data
data = {
    "Customer_Verbatim": [
        "The brake system failed while driving downhill.",
        "Battery drained overnight. No power in the morning.",
        "There is a loud noise coming from the engine area.",
        "Touchscreen display keeps flickering and restarting.",
        "Transmission shifts roughly between gears.",
        "The car won’t start. Engine does not crank."
    ],
    "Issue_Type": [
        "BRAKE_PROBLEM",
        "ELECTRICAL_ISSUE",
        "ENGINE_ISSUE",
        "ELECTRONICS",
        "TRANSMISSION_PROBLEM",
        "ENGINE_ISSUE"
    ]
}

df = pd.DataFrame(data)

# 🧹 3. Text Cleaning
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(w) for w in tokens if w not in stop_words]
    return " ".join(tokens)

df["Cleaned_Text"] = df["Customer_Verbatim"].apply(clean_text)

# 😊 4. Sentiment Analysis
df["Sentiment_Polarity"] = df["Customer_Verbatim"].apply(lambda x: TextBlob(x).sentiment.polarity)

# 🧠 5. Named Entity Recognition
nlp = spacy.load("en_core_web_sm")

def extract_entities(text):
    doc = nlp(text)
    return [(ent.text, ent.label_) for ent in doc.ents]

df["Entities"] = df["Customer_Verbatim"].apply(extract_entities)

# 🔢 6. Vectorization + Classification
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["Cleaned_Text"])
y = df["Issue_Type"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# 📊 Evaluation
print(classification_report(y_test, y_pred))
