In [7]:
import pandas as pd
from datetime import datetime

# Contoh data
df = pd.read_csv('data_tribunnews_cleaned.csv')

# Konversi nama bulan Indonesia ke angka
bulan_mapping = {
    'Januari': '01', 'Februari': '02', 'Maret': '03', 'April': '04',
    'Mei': '05', 'Juni': '06', 'Juli': '07', 'Agustus': '08',
    'September': '09', 'Oktober': '10', 'November': '11', 'Desember': '12'
}

def format_tanggal(tanggal_str):
    try:
        # Pecah string
        parts = tanggal_str.split(', ')[1].split(' ') 
        day = parts[0].zfill(2)
        month = bulan_mapping[parts[1]]
        year = parts[2]
        return f'{day}/{month}/{year}'
    except Exception as e:
        return None  

# Terapkan pada kolom tanggal
df['date'] = df['date'].apply(format_tanggal)

print(df)


                                                 title  date  \
0    mengejutkan! oknum tni al gadaikan motor rp15 ...  None   
1    baru sebulan menikah, agus buntung dituntut pe...  None   
2    menyambut senyum partinem dan 457 jemaah haji ...  None   
3    gemriah fest 2025 pecah di jambi, dewa 19 biki...  None   
4    jadwal badminton taipei open 2025 hari ini: 6 ...  None   
..                                                 ...   ...   
115  desain gedung legislatif-yudikatif di ikn tela...  None   
116  ylbhi: militerisme di indonesia sudah terlihat...  None   
117  sosok fuji di mata ayah verrell bramasta: baik...  None   
118  purnawirawan tni gulirkan pemakzulan gibran, g...  None   
119  kementerian pu sebut biaya pembangunan satu se...  None   

                                                  link  category  
0    https://www.tribunnews.com/regional/2025/05/06...   Lainnya  
1    https://www.tribunnews.com/regional/2025/05/06...  Kriminal  
2    https://www.tribunnews.co

In [8]:
def classify_category(title):
    if any(word in title for word in ['presiden', 'dpr', 'pilkada', 'politik', 'menteri', 'parlemen', 'gibran', 'pemilu', 'pemilihan', 'kpk', 'prabowo', 'jokowi', 'bawaslu', 'partai']):
        return 'Politik'
    elif any(word in title for word in ['korupsi', 'kriminal', 'narkoba', 'penjara', 'pembunuhan', 'kejahatan', 'begal', 'perampokan', 'kasus', 'bunuh', 'penipuan', 'pencurian', 'hukum']):
        return 'Kriminal'
    elif any(word in title for word in ['ekonomi', 'inflasi', 'bisnis', 'investasi', 'rupiah', 'perdagangan', 'pasar', 'beras']):
        return 'Ekonomi'
    elif any(word in title for word in ['sekolah', 'universitas', 'pendidikan', 'guru', 'mahasiswa', 'kampus', 'pendidikan tinggi']):
        return 'Pendidikan'
    elif any(word in title for word in ['bencana', 'iklim', 'cuaca', 'lingkungan', 'gempa', 'tsunami', 'polusi']):
        return 'Lingkungan'
    elif any(word in title for word in ['bola', 'olahraga', 'pertandingan', 'liga', 'sepak bola', 'basket', 'futsal', 'olimpiade', 'atlet', 'badminton', 'barcelona', 'real madrid', 'tim', 'psg']):
        return 'Olahraga'
    elif any(word in title for word in ['film', 'artis', 'musik', 'hiburan', 'drama', 'teater', 'video', 'tv', 'konser', 'fest', 'selebriti']):
        return 'Hiburan'
    elif any(word in title for word in ['dunia', 'internasional', 'luar negeri', 'amerika', 'israel', 'diplomasi', 'perang']):
        return 'Internasional'
    elif any(word in title for word in ['teknologi', 'internet', 'startup', 'gadget', 'AI', 'robotik', 'programming', 'teknologi informasi']):
        return 'Teknologi'
    elif any(word in title for word in ['sains', 'astronomi', 'penemuan', 'ilmu', 'penelitian', 'fisika', 'kimia', 'biologi']):
        return 'Sains'
    elif any(word in title for word in ['kesehatan', 'covid', 'dokter', 'penyakit', 'obat', 'kanker', 'vaksin', 'gizi']):
        return 'Kesehatan'
    elif any(word in title for word in ['sosial', 'masyarakat', 'komunitas', 'volunteer', 'bantuan', 'pengungsi', 'kemanusiaan']):
        return 'Sosial'
    elif any(word in title for word in ['gaya hidup', 'fashion', 'makanan', 'travel', 'wisata', 'kecantikan', 'tips', 'trending']):
        return 'Gaya Hidup'
    elif any(word in title for word in ['keuangan', 'bank', 'investasi', 'asuransi', 'utang', 'nasabah', 'pajak']):
        return 'Keuangan'
    elif any(word in title for word in ['transportasi', 'mobil', 'kereta', 'pesawat', 'jalan tol', 'infrastruktur']):
        return 'Transportasi'
    elif any(word in title for word in ['hukum', 'peradilan', 'pengadilan', 'hakim', 'advokat', 'undang-undang', 'peraturan']):
        return 'Hukum'
    elif any(word in title for word in ['pertanian', 'peternakan', 'perikanan', 'pertambangan', 'pertanian organik']):
        return 'Pertanian'
    elif any(word in title for word in ['kuliner', 'restoran', 'makanan', 'minuman', 'resep', 'cafe', 'makanan khas']):
        return 'Kuliner'
    elif any(word in title for word in ['perusahaan', 'korporasi', 'industri', 'pabrik', 'bisnis', 'kerja', 'pengusaha']):
        return 'Perusahaan'
    elif any(word in title for word in ['astronomi', 'ruang angkasa', 'planet', 'teleskop', 'antariksa', 'roket']):
        return 'Astronomi'
    elif any(word in title for word in ['game', 'esports', 'video game', 'game mobile', 'game pc']):
        return 'Game'
    elif any(word in title for word in ['pernikahan', 'kawin', 'rumah tangga', 'keluarga', 'anak-anak', 'divorcing']):
        return 'Keluarga'
    elif any(word in title for word in ['budaya', 'tradisi', 'adat', 'festival', 'kesenian']):
        return 'Budaya'
    elif any(word in title for word in ['lingkungan', 'polusi', 'pemanasan global', 'recycle', 'konservasi']):
        return 'Lingkungan'
    elif any(word in title for word in ['militer', 'perang', 'tentara', 'senjata', 'konflik']):
        return 'Militer'
    else:
        return 'Lainnya'

df = pd.read_csv('data_tribunnews_cleaned.csv')
df['category'] = df['title'].apply(classify_category)



In [9]:
# Menyimpan hasil ke CSV
df.to_csv('data_tribunnews_cleaned.csv', index=False)