In [4]:
# Cell 1 — Set project paths
from pathlib import Path

project = Path(r"D:\SaaS Ticket Analytics Dashboard")
cleaned = project / "cleaned"

print("Using folder:", cleaned)


Using folder: D:\SaaS Ticket Analytics Dashboard\cleaned


In [5]:
# Cell 2 — List CSVs
files = sorted([p.name for p in cleaned.glob("*.csv")])
files


['agents_clean.csv', 'customers_clean.csv', 'tickets_master_clean.csv']

In [6]:
# Cell 3 — Load tickets data
import pandas as pd

tickets = pd.read_csv(cleaned / "tickets_master_clean.csv", 
                      low_memory=False, 
                      parse_dates=['created_at'])

tickets.shape


(11000, 25)

In [7]:
# Cell 4 — Inspect structure
display(tickets.head(5))                     # first 5 rows
print("\nColumn types:\n", tickets.dtypes)   # datatypes
print("\nMissing %:\n", 
      (tickets.isna().mean()*100).round(2).sort_values(ascending=False).head(10))


Unnamed: 0,ticket_id,created_at,first_response_at,resolved_at,priority,status,issue_type,channel,customer_id,agent_id,...,escalated,csat,created_date,created_hour,created_weekday,agent_name,team,experience_years,customer_size,customer_segment
0,T000001,2025-02-21 10:34:51,2025-02-21 12:04:51,2025-02-22 17:58:20.092908,High,Closed,Performance Issue,Email,C0716,A007,...,False,4.2,2025-02-21,10,Friday,Agent_7,Tier 1,1.7,Large,Retail
1,T000002,2025-08-20 08:09:08,2025-08-20 11:52:08,2025-08-21 00:37:32.619889,Low,Closed,Api Failure,Chat,C0694,A010,...,False,3.8,2025-08-20,8,Wednesday,Agent_10,Tier 2,1.4,Small,Healthcare
2,T000003,2024-11-27 20:38:08,2024-11-27 22:58:08,2024-11-30 00:22:49.088104,Low,Closed,Integration Config,Phone,C0371,A017,...,False,3.3,2024-11-27,20,Wednesday,Agent_17,Tier 1,1.2,Medium,Healthcare
3,T000004,2024-11-21 21:36:48,2024-11-21 21:42:48,2024-11-21 22:36:48.000000,Urgent,Closed,Login Issue,Email,C0719,A030,...,False,3.2,2024-11-21,21,Thursday,Agent_30,Tier 1,9.7,Medium,Education
4,T000005,2025-02-21 08:59:35,2025-02-21 15:07:35,2025-02-22 09:06:43.772157,Low,Closed,Billing Query,Email,C0147,A021,...,False,2.9,2025-02-21,8,Friday,Agent_21,Tier 2,2.0,Enterprise,Education



Column types:
 ticket_id                        object
created_at               datetime64[ns]
first_response_at                object
resolved_at                      object
priority                         object
status                           object
issue_type                       object
channel                          object
customer_id                      object
agent_id                         object
first_response_mins             float64
resolution_time_hours           float64
sla_hours                         int64
sla_breached                      int64
reopened                           bool
escalated                          bool
csat                            float64
created_date                     object
created_hour                      int64
created_weekday                  object
agent_name                       object
team                             object
experience_years                float64
customer_size                    object
customer_segment        

In [8]:
# Cell 5 — Create helper columns
tickets['created_date'] = tickets['created_at'].dt.date
tickets['year_month'] = tickets['created_at'].dt.to_period('M').astype(str)
tickets['created_hour'] = tickets['created_at'].dt.hour

tickets[['created_date','year_month','created_hour']].head()


Unnamed: 0,created_date,year_month,created_hour
0,2025-02-21,2025-02,10
1,2025-08-20,2025-08,8
2,2024-11-27,2024-11,20
3,2024-11-21,2024-11,21
4,2025-02-21,2025-02,8


In [9]:
# Cell 6 — Simple KPIs

total_tickets = len(tickets)

avg_first_response = tickets['first_response_mins'].mean() \
    if 'first_response_mins' in tickets.columns else None

avg_resolution_hrs = tickets['resolution_time_hours'].mean() \
    if 'resolution_time_hours' in tickets.columns else None

sla_breach_pct = tickets['sla_breached'].fillna(0).astype(int).mean()*100 \
    if 'sla_breached' in tickets.columns else None

avg_csat = tickets['csat'].mean() \
    if 'csat' in tickets.columns else None

print("Total Tickets:", total_tickets)
print("Avg First Response (mins):", round(avg_first_response,2))
print("Avg Resolution (hrs):", round(avg_resolution_hrs,2))
print("SLA Breach %:", round(sla_breach_pct,2))
print("Avg CSAT:", round(avg_csat,2))


Total Tickets: 11000
Avg First Response (mins): 260.39
Avg Resolution (hrs): 20.86
SLA Breach %: 0.0
Avg CSAT: 3.56


In [10]:
# Cell 7 — Save metadata summary
meta = pd.DataFrame({
    'column': tickets.columns,
    'dtype': tickets.dtypes.astype(str),
    'n_missing': tickets.isna().sum(),
    'pct_missing': (tickets.isna().mean()*100).round(2)
})

meta.to_csv(project / "outputs" / "stage1_metadata.csv", index=False)
meta.head()


Unnamed: 0,column,dtype,n_missing,pct_missing
ticket_id,ticket_id,object,0,0.0
created_at,created_at,datetime64[ns],0,0.0
first_response_at,first_response_at,object,0,0.0
resolved_at,resolved_at,object,0,0.0
priority,priority,object,0,0.0


In [11]:
# Cell 8 — Finish
print("Stage 1 completed. Metadata saved to:", project / "outputs" / "stage1_metadata.csv")


Stage 1 completed. Metadata saved to: D:\SaaS Ticket Analytics Dashboard\outputs\stage1_metadata.csv
