In [6]:
import gspread
import pandas as pd
from google.oauth2.service_account import Credentials

## Pipeline A

In [7]:
SERVICE_ACCOUNT_FILE = '../../key/credentials.json'
SCOPES = ['https://www.googleapis.com/auth/spreadsheets',
          'https://www.googleapis.com/auth/drive']

creds = Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
client = gspread.authorize(creds)

sheet = client.open("QC Chatbot ").worksheet("Sampling Bad Survey")
data = sheet.get_all_values()

df = pd.DataFrame(data[1:], columns=data[0])

In [8]:
df

Unnamed: 0,Checking Date,Conversation Start Time,Week,Month,QC Name,Business Type,UID,ID Chat,Main Category,QC Result,Sub Category,Suggestion Rate,Type,Category,Code_lama,Code,Remarks,Rating
0,1/31/2023,1/30/2023,Week 1,January,Nurhamni Septia,No Differentiated,2010758137,2-2010758137-3-1_1675043140,SYSTEM,Robots don't show up,Others could not be identified,Medium,Bad Survey,No Category,209901,,Robot tidak mengirimkan jawaban seharusnya bis...,1
1,1/31/2023,1/30/2023,Week 1,January,Nurhamni Septia,ASI,2011367987,2-2011367987-3-1_1675050791,CHATBOT OPS,Add Question Simulation,Online Merchant - Shipping Complaints,Urgent,Bad Survey,Merchant Online,131901,,kalau emang belum di kirim hari ini saya akan ...,1
2,1/31/2023,1/30/2023,Week 1,January,Nurhamni Septia,AFI,2016182809,2-2016182809-3-1_1675069380,Customer,Information Not Meet Customer Expectations,"Have a loan on another platform, cannot make t...",,Bad Survey,Submission of Limit/Credit Points,050301,,Customer telah memilih pertanyaan dan jawabann...,1
3,1/31/2023,1/30/2023,Week 1,January,Nurhamni Septia,No Differentiated,2017149569,2-2017149569-3-1_1675051331,Customer,Chat Customer No Clear,Chit-chat,,Bad Survey,No Category,-,-,Customer tidak chat apapun hanya menampilkan t...,1
4,1/31/2023,1/30/2023,Week 1,January,Nurhamni Septia,ASI,2018885687,2-2018885687-3-1_1675013279,Customer,Information Not Meet Customer Expectations,Online Merchant - No receipt number,,Bad Survey,Merchant Online,131201,,"Pertanyaan dan jawaban sudah sesuai FAQ, namun...",1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17394,8/20/2025,8/19/2025,Week 4,August,,,2049330886,2-2049330886-3-1_1755621871,,,,,,,,,,1
17395,8/20/2025,8/19/2025,Week 4,August,,,2020796983,2-2020796983-3-1_1755586207,,,,,,,,,,1
17396,8/20/2025,8/19/2025,Week 4,August,,,2038428538,2-2038428538-3-1_1755586105,,,,,,,,,,1
17397,8/20/2025,8/19/2025,Week 4,August,,,2257413,2-2257413-3-1_1755613234,,,,,,,,,,1


In [9]:
df_clean = df.copy()

df_clean['Checking Date'] = pd.to_datetime(df_clean['Checking Date'], format="%m/%d/%Y", errors='coerce')
df_clean['Conversation Start Time'] = pd.to_datetime(df_clean['Conversation Start Time'], format="%m/%d/%Y", errors='coerce')

df_clean = df_clean.drop('Code_lama', axis=1)
df_clean['Code'] = df_clean['Code'].fillna('').replace('-', '')

df_clean['Rating'] = pd.to_numeric(df_clean['Rating'], errors='coerce')

df_clean = df_clean.drop_duplicates()

df_clean = df_clean.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

df_clean

Unnamed: 0,Checking Date,Conversation Start Time,Week,Month,QC Name,Business Type,UID,ID Chat,Main Category,QC Result,Sub Category,Suggestion Rate,Type,Category,Code,Remarks,Rating
0,2023-01-31,2023-01-30,Week 1,January,Nurhamni Septia,No Differentiated,2010758137,2-2010758137-3-1_1675043140,SYSTEM,Robots don't show up,Others could not be identified,Medium,Bad Survey,No Category,,Robot tidak mengirimkan jawaban seharusnya bis...,1.0
1,2023-01-31,2023-01-30,Week 1,January,Nurhamni Septia,ASI,2011367987,2-2011367987-3-1_1675050791,CHATBOT OPS,Add Question Simulation,Online Merchant - Shipping Complaints,Urgent,Bad Survey,Merchant Online,,kalau emang belum di kirim hari ini saya akan ...,1.0
2,2023-01-31,2023-01-30,Week 1,January,Nurhamni Septia,AFI,2016182809,2-2016182809-3-1_1675069380,Customer,Information Not Meet Customer Expectations,"Have a loan on another platform, cannot make t...",,Bad Survey,Submission of Limit/Credit Points,,Customer telah memilih pertanyaan dan jawabann...,1.0
3,2023-01-31,2023-01-30,Week 1,January,Nurhamni Septia,No Differentiated,2017149569,2-2017149569-3-1_1675051331,Customer,Chat Customer No Clear,Chit-chat,,Bad Survey,No Category,,Customer tidak chat apapun hanya menampilkan t...,1.0
4,2023-01-31,2023-01-30,Week 1,January,Nurhamni Septia,ASI,2018885687,2-2018885687-3-1_1675013279,Customer,Information Not Meet Customer Expectations,Online Merchant - No receipt number,,Bad Survey,Merchant Online,,"Pertanyaan dan jawaban sudah sesuai FAQ, namun...",1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17394,2025-08-20,2025-08-19,Week 4,August,,,2049330886,2-2049330886-3-1_1755621871,,,,,,,,,1.0
17395,2025-08-20,2025-08-19,Week 4,August,,,2020796983,2-2020796983-3-1_1755586207,,,,,,,,,1.0
17396,2025-08-20,2025-08-19,Week 4,August,,,2038428538,2-2038428538-3-1_1755586105,,,,,,,,,1.0
17397,2025-08-20,2025-08-19,Week 4,August,,,2257413,2-2257413-3-1_1755613234,,,,,,,,,1.0


In [10]:
df_clean.to_csv('../../dataset_kula/bad_survey.csv')