In [1]:
import gspread
import pandas as pd
from datetime import date
from google.oauth2.service_account import Credentials

## Pipeline A

In [2]:
SERVICE_ACCOUNT_FILE = '../../key/credentials.json'
SCOPES = ['https://www.googleapis.com/auth/spreadsheets',
          'https://www.googleapis.com/auth/drive']

creds = Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
client = gspread.authorize(creds)

sheet = client.open("Chatbot Ops Report").worksheet("CSAT Chatbot")
data = sheet.get_all_values()

df = pd.DataFrame(data[1:], columns=data[0])

In [3]:
df_clean = df.copy()

df_clean = df_clean.drop('Day', axis=1)
df_clean['Date'] = pd.to_datetime(df_clean['Date'], format='%d-%b-%Y', errors='coerce')
df_clean = df_clean.dropna(subset=['Date'])

In [4]:
df_clean

Unnamed: 0,Date,Total Good Survey,Total Bad Survey,Total Responden,Total Rating,CSAT
1,2023-01-30,25,16,41,133,3.24
2,2023-01-31,26,17,43,140,3.26
3,2023-02-01,21,12,33,113,3.42
4,2023-02-02,23,17,40,130,3.25
5,2023-02-03,24,19,43,130,3.02
...,...,...,...,...,...,...
951,2025-09-06,16,9,25,83,3.32
952,2025-09-07,25,14,39,133,3.41
953,2025-09-08,17,10,27,90,3.33
954,2025-09-09,18,9,27,93,3.44


## Pipeline B

In [5]:
df_b = pd.read_csv('../../dataset_kula/csat/akulaku_superset_presto_hanmm_ozy.prazuganda_1755663318_8_19.csv')
df_b.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11949 entries, 0 to 11948
Data columns (total 33 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   business                           11949 non-null  object 
 1   id                                 11949 non-null  object 
 2   currentAccount                     11949 non-null  object 
 3   userId                             11949 non-null  int64  
 4   shopid                             11949 non-null  int64  
 5   summary_type                       0 non-null      float64
 6   summary_classify                   0 non-null      float64
 7   ticketid                           4805 non-null   object 
 8   Category 1                         4492 non-null   object 
 9   Category 2                         4146 non-null   object 
 10  Category 3                         3912 non-null   object 
 11  summary_text                       0 non-null      flo

In [6]:
# Filtering the currentAccount into robot only
df_filtered = df_b[df_b['currentAccount'] == '3-1-robot']

# Cleaning and Counting
score = df_filtered['score'].dropna()

total_responden = score.count()
total_rating = int(score.sum())
csat = round(total_rating / total_responden, 2) if total_responden > 0 else 0

# Good survey and bad survey counting
good_survey = score[score >= 3].count()
bad_survey = score[score < 3].count()

In [7]:
"""
-- Not necessary at the moment --

# Checking if the yesterday data already exists in the DataFrame
yesterday = pd.Timestamp(date.today() - pd.Timedelta(days=1))

if (df_clean['Date'].iloc[-1].normalize != yesterday.normalize()):
    new_row = pd.DataFrame([{
        'Date': yesterday,
        'Total Good Survey': good_survey,
        'Total Bad Survey': bad_survey,
        'Total Responden': total_responden,
        'Total Rating': total_rating,
        'CSAT': csat
    }])

    df_clean = pd.concat([df_clean, new_row], ignore_index=True)

"""

"\n-- Not necessary at the moment --\n\n# Checking if the yesterday data already exists in the DataFrame\nyesterday = pd.Timestamp(date.today() - pd.Timedelta(days=1))\n\nif (df_clean['Date'].iloc[-1].normalize != yesterday.normalize()):\n    new_row = pd.DataFrame([{\n        'Date': yesterday,\n        'Total Good Survey': good_survey,\n        'Total Bad Survey': bad_survey,\n        'Total Responden': total_responden,\n        'Total Rating': total_rating,\n        'CSAT': csat\n    }])\n\n    df_clean = pd.concat([df_clean, new_row], ignore_index=True)\n\n"

In [8]:
df_clean.to_csv('../../dataset_kula/csat.csv')