In [16]:
import gspread
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from wordcloud import WordCloud
from google.oauth2.service_account import Credentials

In [17]:
SERVICE_ACCOUNT_FILE = 'key/credentials.json'
SCOPES = ['https://www.googleapis.com/auth/spreadsheets',
          'https://www.googleapis.com/auth/drive']

creds = Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
client = gspread.authorize(creds)

sheet = client.open('Suara Bukan Manusia').worksheet('Sheet1')
data = sheet.get_all_values()

df = pd.DataFrame(data[1:], columns=data[0])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 502 entries, 0 to 501
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Tanggal Pengerjaan  502 non-null    object
 1   Nama                502 non-null    object
 2   ASI/AFI             502 non-null    object
 3   Call ID             502 non-null    object
 4   Detik               502 non-null    object
 5   teks robot          502 non-null    object
 6   Recording           502 non-null    object
dtypes: object(7)
memory usage: 27.6+ KB


In [20]:
# copying dataframe
df_clean = df.copy()

cat_cols = ['Nama', 'ASI/AFI', 'teks robot']

df_clean = (
    df_clean
    .rename(columns=lambda c: c.strip() if isinstance(c, str) else c)
    .drop(columns=['Recording'], errors='ignore')
    .assign(**{
        c: lambda _d, c=c: _d[c].astype(str).str.strip() if c in _d.columns else _d.get(c) for c in cat_cols
    })
)

# date
if 'Tanggal Pengerjaan' in df_clean.columns:
    df_clean['Tanggal Pengerjaan'] = pd.to_datetime(df_clean['Tanggal Pengerjaan'], errors='coerce')

existing_cat_cols = [c for c in cat_cols if c in df_clean.columns]
if existing_cat_cols:
    df_clean = df_clean.astype({c: 'category' for c in existing_cat_cols})

# inspect
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 502 entries, 0 to 501
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Tanggal Pengerjaan  502 non-null    datetime64[ns]
 1   Nama                502 non-null    category      
 2   ASI/AFI             502 non-null    category      
 3   Call ID             502 non-null    object        
 4   Detik               502 non-null    object        
 5   teks robot          502 non-null    category      
dtypes: category(3), datetime64[ns](1), object(2)
memory usage: 16.4+ KB


In [None]:
df_clean.info()