In [1]:
import streamlit as st
import pandas as pd
import numpy as np
import gspread
import plotly.express as px
import plotly.graph_objects as go

from datetime import timedelta
from google.oauth2.service_account import Credentials
from collections import defaultdict

In [None]:
# Read data from gsheet

SERVICE_ACCOUNT_FILE = '../../key/credentials.json'
SCOPES = ['https://www.googleapis.com/auth/spreadsheets',
          'https://www.googleapis.com/auth/drive']
creds = Credentials.from_service_account_file(
    SERVICE_ACCOUNT_FILE, scopes=SCOPES)
client = gspread.authorize(creds)

sheet = client.open("[3] AI QC Inbound CRM Review 语音智能质检打标复审 ").worksheet("NEW 4")
data = sheet.get_all_values()

raw_headers = data[3]
rows = data[:4]
header_counter = defaultdict(int)
unique_header = []

for header in raw_headers:
    base = header if header.strip() != '' else 'Unnamed'
    header_counter[base] += 1

    if header_counter[base] == 1:
        unique_header.append(base)
    else:
        unique_header.append(f"{base}_{header_counter}[base]-1")

df = pd.DataFrame(data[2:], columns=unique_header)

In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 162932 entries, 0 to 162931
Data columns (total 23 columns):
 #   Column                            Non-Null Count   Dtype 
---  ------                            --------------   ----- 
 0   No                                162932 non-null  object
 1   Tanggal Pengerjaan                162932 non-null  object
 2   Checker                           162932 non-null  object
 3   ASI/AFI                           162932 non-null  object
 4   Waktu Inbound                     162932 non-null  object
 5   Nomor Inbound                     162932 non-null  object
 6   Nama Agent                        162932 non-null  object
 7   Call ID                           162932 non-null  object
 8   Durasi Bicara                     162932 non-null  object
 9   Detik                             162932 non-null  object
 10  Total Tiket Riskan                162932 non-null  object
 11  Apakah Riskan?                    162932 non-null  object
 12  Ti

In [None]:
# Data Cleaning
df_clean = df.copy()

# Hapus 2 baris awal
df_clean = df_clean.iloc[2:].reset_index(drop=True)

# Bersihkan nama kolom dan ganti yang kosong
df_clean.columns = [col.strip() if col.strip() else 'Catatan Tambahan' for col in df_clean.columns]

# Konversi kolom datetime jika ada
for col in ['Tanggal Pengerjaan', 'Waktu Inbound']:
    if col in df_clean.columns:
        df_clean[col] = pd.to_datetime(df_clean[col], errors='coerce')

# Konversi kolom numerik
if 'No' in df_clean.columns:
    df_clean['No'] = pd.to_numeric(df_clean['No'], errors='coerce')

# Bersihkan kolom teks
text_columns = df_clean.select_dtypes(include='object').columns
df_clean[text_columns] = df_clean[text_columns].apply(lambda s: s.astype(str).str.strip())
df_clean.replace('', 'No Data', inplace=True)

# Konversi kolom kategorikal jika ada
categorical_cols = [
    'Checker', 'ASI/AFI', 'Status',
    'Efektif', 'Kejelasan Suara', 'Suara Lain',
    'Kelengkapan Rekaman', 'Sampling user side',
    'Hasil ASR', 'Hasil Pemeriksaan Kualitas', 'Agent Sampling'
]
for col in categorical_cols:
    if col in df_clean.columns:
        df_clean[col] = df_clean[col].astype('category')

# Hapus kolom 'Status' jika ada
df_clean.drop(columns=['Status'], errors='ignore', inplace=True)

# Tampilkan info
df_clean.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 162930 entries, 0 to 162929
Data columns (total 22 columns):
 #   Column                            Non-Null Count   Dtype         
---  ------                            --------------   -----         
 0   No                                162929 non-null  float64       
 1   Tanggal Pengerjaan                59796 non-null   datetime64[ns]
 2   Checker                           162930 non-null  category      
 3   ASI/AFI                           162930 non-null  category      
 4   Waktu Inbound                     0 non-null       datetime64[ns]
 5   Nomor Inbound                     162930 non-null  object        
 6   Nama Agent                        162930 non-null  object        
 7   Call ID                           162930 non-null  object        
 8   Durasi Bicara                     162930 non-null  object        
 9   Detik                             162930 non-null  object        
 10  Total Tiket Riskan              

In [31]:
df_clean

Unnamed: 0,No,Tanggal Pengerjaan,Checker,ASI/AFI,Waktu Inbound,Nomor Inbound,Nama Agent,Call ID,Durasi Bicara,Detik,...,Tinjauan CS,Sampling user side,Hasil Pemeriksaan Kualitas (old),Hasil ASR,Hasil Pemeriksaan Kualitas,Efektif,Kejelasan Suara,Suara Lain,Kelengkapan Rekaman,Agent Sampling
0,,NaT,Checker,ASI/AFI,NaT,Nomor Inbound,Nama Agent,Call ID,Durasi Bicara,Detik,...,Tinjauan CS,Sampling user side,Hasil Pemeriksaan Kualitas (old),Hasil ASR,Hasil Pemeriksaan Kualitas,Efektif,Kejelasan Suara,Suara Lain,Kelengkapan Rekaman,Agent Sampling
1,1.0,2025-10-03,Azer,ASI,NaT,0812****7814,TCHL_Anindyka Alfisyah,09a7b53c48c7419ab54ca4b412cb6257,0:26:27,2025-03-07 13:06:54,...,No Data,Done,No Data,Entri Akurat,Percakapan Normal,Miss Target/ Not HC,"Sangat jelas, tidak bising sama sekali",0 Satu pembicara,0 Utuh,No Data
2,2.0,2025-10-03,Azer,ASI,NaT,0812****7814,TCHL_Anindyka Alfisyah,09a7b53c48c7419ab54ca4b412cb6257,0:26:27,2025-03-07 13:07:08,...,No Data,Done,No Data,Entri Akurat,Percakapan Normal,Miss Target/ Not HC,"Sangat jelas, tidak bising sama sekali",0 Satu pembicara,0 Utuh,No Data
3,3.0,2025-10-03,Azer,ASI,NaT,0812****7814,TCHL_Anindyka Alfisyah,09a7b53c48c7419ab54ca4b412cb6257,0:26:27,2025-03-07 13:07:13,...,No Data,Done,No Data,Terdapat kesalahan,Percakapan Normal,Miss Target/ Not HC,Cukup Jelas,1 Suara tumpang tindih,1 Tidak Utuh,No Data
4,4.0,2025-10-03,Azer,ASI,NaT,0812****7814,TCHL_Anindyka Alfisyah,09a7b53c48c7419ab54ca4b412cb6257,0:26:27,2025-03-07 13:07:20,...,No Data,Done,No Data,Terdapat kesalahan,Percakapan Normal,Miss Target/ Not HC,Cukup Jelas,0 Satu pembicara,0 Utuh,No Data
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162925,162925.0,NaT,Aul,AFI,NaT,0818**6780,Damayanti,081a9aba28064343b955313be487eadf,0:05:25,2025-09-22 11:41:28,...,No Data,Done,No Data,Terdapat kesalahan,Percakapan normal & aturan lainnya,Miss Target/ Not HC,Konten Non Vokal,0 Satu pembicara,1 Tidak Utuh,No Data
162926,162926.0,NaT,Aul,AFI,NaT,0818**6780,Damayanti,081a9aba28064343b955313be487eadf,0:05:25,2025-09-22 11:41:30,...,No Data,Done,No Data,Entri Akurat,Percakapan Normal,Miss Target/ Not HC,Konten Non Vokal,0 Satu pembicara,0 Utuh,No Data
162927,162927.0,NaT,Aul,AFI,NaT,0818**6780,Damayanti,081a9aba28064343b955313be487eadf,0:05:25,2025-09-22 11:41:32,...,No Data,Done,No Data,Terdapat kesalahan,Percakapan normal & aturan lainnya,Miss Target/ Not HC,Konten Non Vokal,0 Satu pembicara,1 Tidak Utuh,No Data
162928,162928.0,NaT,Aul,AFI,NaT,0818**6780,Damayanti,081a9aba28064343b955313be487eadf,0:05:25,2025-09-22 11:41:35,...,No Data,Done,No Data,Terdapat kesalahan,Percakapan normal & aturan lainnya,Miss Target/ Not HC,Konten Non Vokal,0 Satu pembicara,1 Tidak Utuh,No Data


In [6]:
df_clean.to_csv('../../dataset_qc/new_4_clean.csv')