# Exploratory Data Analysis (EDA)

This notebook analyzes the synthetic customer support ticket dataset.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Settings
sns.set(style="whitegrid")
%matplotlib inline

## 1. Load Data

In [None]:
df = pd.read_csv('../data/raw/tickets.csv')
print(f"Shape: {df.shape}")
df.head()

## 2. Data Quality Checks

In [None]:
df.info()

In [None]:
print("Missing values:\n", df.isnull().sum())

## 3. Class Distribution

In [None]:
plt.figure(figsize=(10, 6))
sns.countplot(y='category', data=df, order=df['category'].value_counts().index)
plt.title('Distribution of Ticket Categories')
plt.xlabel('Count')
plt.ylabel('Category')
plt.show()

## 4. Priority Distribution

In [None]:
plt.figure(figsize=(8, 5))
sns.countplot(x='priority', data=df, order=['Low', 'Medium', 'High', 'Critical'])
plt.title('Distribution of Priority Levels')
plt.show()

## 5. Text Length Analysis

In [None]:
df['text_length'] = df['description'].apply(len)

plt.figure(figsize=(12, 5))
sns.histplot(df['text_length'], bins=50, kde=True)
plt.title('Distribution of Description Length (Characters)')
plt.show()