In [2]:
import gspread
import pandas as pd
from google.oauth2.service_account import Credentials

## Pipeline A

In [3]:
SERVICE_ACCOUNT_FILE = '../../key/credentials.json'
SCOPES = ['https://www.googleapis.com/auth/spreadsheets',
          'https://www.googleapis.com/auth/drive']

creds = Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
client = gspread.authorize(creds)

sheet = client.open("QC Chatbot ").worksheet("Sampling Bad Survey")
data = sheet.get_all_values()

df = pd.DataFrame(data[1:], columns=data[0])

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18236 entries, 0 to 18235
Data columns (total 18 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   Checking Date            18236 non-null  object
 1   Conversation Start Time  18236 non-null  object
 2   Week                     18236 non-null  object
 3   Month                    18236 non-null  object
 4   QC Name                  18236 non-null  object
 5   Business Type            18236 non-null  object
 6   UID                      18236 non-null  object
 7   ID Chat                  18236 non-null  object
 8   Main Category            18236 non-null  object
 9   QC Result                18236 non-null  object
 10  Sub Category             18236 non-null  object
 11  Suggestion Rate          18236 non-null  object
 12  Type                     18236 non-null  object
 13  Category                 18236 non-null  object
 14  Code_lama                18236 non-nul

In [5]:
# Copy the main DF
df_clean = df.copy()

# Convert date column
date_cols = ['Checking Date', 'Conversation Start Time']
for col in date_cols:
    df_clean[col] = pd.to_datetime(df_clean[col], format = "%m/%d/%Y", errors='coerce')

# Delete unnecessary columns
df_clean.drop(columns=['Code_lama'], inplace=True)

# Clean column Code
df_clean['Code'] = (
    df_clean['Code']
    .fillna('')
    .replace('-', '', regex=True)
)

# Convert rate to numeric
df_clean['Rating'] = pd.to_numeric(df_clean['Rating'], errors='coerce')

# Remove duplicate
df_clean.drop_duplicates(inplace=True)

# strip spasi untuk semua kolom object/string
df_clean = df_clean.apply(lambda col: col.str.strip() if col.dtype == "object" else col)

df_clean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 18233 entries, 0 to 18235
Data columns (total 17 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Checking Date            18232 non-null  datetime64[ns]
 1   Conversation Start Time  18233 non-null  datetime64[ns]
 2   Week                     18233 non-null  object        
 3   Month                    18233 non-null  object        
 4   QC Name                  18233 non-null  object        
 5   Business Type            18233 non-null  object        
 6   UID                      18233 non-null  object        
 7   ID Chat                  18233 non-null  object        
 8   Main Category            18233 non-null  object        
 9   QC Result                18233 non-null  object        
 10  Sub Category             18233 non-null  object        
 11  Suggestion Rate          18233 non-null  object        
 12  Type                     18233 non-nu

In [6]:
df_clean.to_csv('../../dataset_kula/bad_survey.csv')

In [7]:
df_clean

Unnamed: 0,Checking Date,Conversation Start Time,Week,Month,QC Name,Business Type,UID,ID Chat,Main Category,QC Result,Sub Category,Suggestion Rate,Type,Category,Code,Remarks,Rating
0,2023-01-31,2023-01-30,Week 1,January,Nurhamni Septia,No Differentiated,2010758137,2-2010758137-3-1_1675043140,SYSTEM,Robots don't show up,Others could not be identified,Medium,Bad Survey,No Category,,Robot tidak mengirimkan jawaban seharusnya bis...,1.0
1,2023-01-31,2023-01-30,Week 1,January,Nurhamni Septia,ASI,2011367987,2-2011367987-3-1_1675050791,CHATBOT OPS,Add Question Simulation,Online Merchant - Shipping Complaints,Urgent,Bad Survey,Merchant Online,,kalau emang belum di kirim hari ini saya akan ...,1.0
2,2023-01-31,2023-01-30,Week 1,January,Nurhamni Septia,AFI,2016182809,2-2016182809-3-1_1675069380,Customer,Information Not Meet Customer Expectations,"Have a loan on another platform, cannot make t...",,Bad Survey,Submission of Limit/Credit Points,,Customer telah memilih pertanyaan dan jawabann...,1.0
3,2023-01-31,2023-01-30,Week 1,January,Nurhamni Septia,No Differentiated,2017149569,2-2017149569-3-1_1675051331,Customer,Chat Customer No Clear,Chit-chat,,Bad Survey,No Category,,Customer tidak chat apapun hanya menampilkan t...,1.0
4,2023-01-31,2023-01-30,Week 1,January,Nurhamni Septia,ASI,2018885687,2-2018885687-3-1_1675013279,Customer,Information Not Meet Customer Expectations,Online Merchant - No receipt number,,Bad Survey,Merchant Online,,"Pertanyaan dan jawaban sudah sesuai FAQ, namun...",1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18231,2025-10-08,2025-10-07,Week 2,October,Nurhamni Septia,AFI,5856441,2-5856441-3-1_1759826988,Customer,Information Not Meet Customer Expectations,Cannot make credit applications in the Akulaku...,,Bad Survey,Submission of Limit/Credit Points,289326483503,,1.0
18232,2025-10-08,2025-10-07,Week 2,October,Yulia Sari,,21881579,2-21881579-3-1_1759817954,,,,,Bad Survey,,,,2.0
18233,2025-10-08,2025-10-07,Week 2,October,Yulia Sari,,2034268431,2-2034268431-3-1_1759789626,,,,,Bad Survey,,,,1.0
18234,2025-10-08,2025-10-07,Week 2,October,Yulia Sari,,2004405354,2-2004405354-3-1_1759814423,,,,,Bad Survey,,,,1.0
