In [None]:
# Import libraries
import pandas as pd


In [None]:
# Load and preview dataset
df = pd.read_csv("customer_support_tickets.csv", encoding='utf-8')
print("Columns:", df.columns.tolist())
df.head()


In [None]:
# Rename and validate columns
required_cols_mapping = {
    "Ticket Description": "Ticket Description",
    "Ticket Type": "Ticket Type",
    "Ticket Priority": "Ticket Priority",
    "gender": "Customer Gender"
}

# Check if required columns exist
missing = [v for v in required_cols_mapping.values() if v not in df.columns]
if missing:
    raise ValueError(f"Missing columns: {missing}")

# Standardize column names
df = df.rename(columns={v: k for k, v in required_cols_mapping.items()})
print("✅ Renamed columns:", df.columns.tolist())


In [None]:
import re
import string

# Custom text cleaning function
def clean_text(text):
    if pd.isna(text):
        return ""
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)  # remove URLs
    text = re.sub(r"\s+", ' ', text)  # remove extra spaces
    text = text.translate(str.maketrans('', '', string.punctuation))  # remove punctuation
    return text.strip()

# Apply cleaning to ticket descriptions
df["Ticket Description"] = df["Ticket Description"].apply(clean_text)

# Drop rows with empty values in key columns
df = df.dropna(subset=["Ticket Description", "Ticket Type", "Ticket Priority"])
print("✅ Cleaned ticket descriptions and dropped missing rows")
