## 1. Setup dan Instalasi Dependencies

In [None]:
# Install required packages
!pip install pandas openpyxl -q

In [None]:
# Import libraries
import pandas as pd
import re
from typing import Dict, List, Tuple, Set
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Libraries imported successfully!")

## 2. Upload File Data Excel

Upload file `data_skripsi.xlsx` ke Google Colab

In [None]:
from google.colab import files

print("üìÅ Upload file data_skripsi.xlsx")
uploaded = files.upload()

# Get the filename
FILE_PATH = list(uploaded.keys())[0]
print(f"‚úÖ File uploaded: {FILE_PATH}")

## 3. Data Loader Module

Module untuk load dan preprocessing data dari Excel

In [None]:
def load_data(file_path):
    """
    Load dan preprocessing data dari Excel
    """
    raw = pd.read_excel(file_path, header=1)
    header = raw.iloc[0]
    df = raw.iloc[1:].reset_index(drop=True)
    df.columns = header
    df = df.loc[:, ~df.columns.isna()]
    df = df.dropna(subset=["Jenis Perbaikan"]).reset_index(drop=True)
    
    print("Kolom yang tersedia:", df.columns.tolist())
    
    # Rename dengan handling berbagai kemungkinan nama
    rename_mapping = {}
    for col in df.columns:
        col_lower = str(col).lower().strip()
        if "id produk" in col_lower or "id_produk" in col_lower:
            rename_mapping[col] = "id_produk"
        elif "jenis perbaikan" in col_lower:
            rename_mapping[col] = "jenis_perbaikan"
        elif "gejala kerusakan" in col_lower:
            rename_mapping[col] = "gejala"
        elif "jenis kerusakan" in col_lower:
            rename_mapping[col] = "jenis_kerusakan"
        elif "tingkat kerusakan" in col_lower:
            rename_mapping[col] = "tingkat"
        elif "rekomendasi perbaikan" in col_lower:
            rename_mapping[col] = "rekomendasi"
        elif "estimasi biaya" in col_lower:
            rename_mapping[col] = "biaya"
        elif "estimasi waktu pengerjaan" in col_lower:
            rename_mapping[col] = "waktu"
        elif "estimasi waktu" in col_lower and "diagnosis" in col_lower:
            rename_mapping[col] = "waktu_diagnosis"
    
    df = df.rename(columns=rename_mapping)
    
    return df


def get_output_mapping(df):
    """
    Buat mapping rekomendasi ke biaya dan waktu
    """
    output_map = {}
    for _, row in df.iterrows():
        if "rekomendasi" in df.columns and pd.notna(row.get("rekomendasi")):
            biaya_val = row.get("biaya", "-")
            waktu_val = row.get("waktu", "-")
            
            output_map[row["rekomendasi"]] = {
                "biaya": biaya_val if pd.notna(biaya_val) else "-",
                "waktu": waktu_val if pd.notna(waktu_val) else "-"
            }
    return output_map

print("‚úÖ Data Loader Module ready!")

## 4. Utility Functions

Fungsi-fungsi untuk preprocessing text dan feature extraction

In [None]:
def split_phrases(text: str) -> List[str]:
    """
    Split text menjadi list phrases dengan normalisasi
    """
    if pd.isna(text):
        return []
    text = str(text).lower()
    
    # Normalisasi common patterns
    text = text.replace("diduduki", "duduki")
    text = text.replace("di duduki", "duduki")
    
    # Normalisasi possessive dan suffix
    text = re.sub(r'nya\b', '', text)
    
    # Replace separators
    text = text.replace("/", ",")
    text = text.replace(" dan ", ",")
    text = text.replace(" atau ", ",")
    text = text.replace(";", ",")
    
    # Remove special characters
    text = re.sub(r"[^a-zA-Z0-9, ]", "", text)
    
    # Split by comma
    parts = [p.strip() for p in text.split(",") if p.strip()]
    
    # Further split by whitespace untuk catch individual keywords
    all_keywords = []
    for part in parts:
        all_keywords.append(part)
        words = [w.strip() for w in part.split() if len(w.strip()) > 2]
        all_keywords.extend(words)
    
    # Remove duplicates
    seen = set()
    cleaned = []
    for keyword in all_keywords:
        keyword = " ".join(keyword.split())
        if keyword and keyword not in seen and len(keyword) > 2:
            cleaned.append(keyword)
            seen.add(keyword)
    
    return cleaned

print("‚úÖ Utility Functions ready!")

## 5. Rules Engine - Forward Chaining & Certainty Factor

Implementasi 2 metode utama

In [None]:
DEFAULT_CF = 0.8

class RulesEngine:
    """
    Rules engine untuk Forward Chaining dan Certainty Factor
    """
    
    def __init__(self, df):
        self.df = df
        self.rules = self._build_rules(df)
    
    def _build_rules(self, df):
        """
        Build rules dari dataframe
        """
        rules = {}
        for _, row in df.iterrows():
            cond_phrases = split_phrases(row["gejala"]) + split_phrases(row["jenis_kerusakan"])
            if "jenis_perbaikan" in df.columns:
                cond_phrases += split_phrases(row.get("jenis_perbaikan", ""))
            cond = tuple(sorted(set(cond_phrases)))
            rules[cond] = row["rekomendasi"]
        return rules
    
    def forward_chaining(self, input_phrases: List[str]) -> str:
        """
        Forward Chaining: cari rule dengan matching terpanjang
        """
        input_set = set(input_phrases)
        best, max_score = None, 0
        
        print(f"\n[Forward Chaining] Input: {input_phrases}")
        
        for cond, sol in self.rules.items():
            cond_set = set(cond)
            overlap = cond_set & input_set
            overlap_count = len(overlap)
            
            if overlap_count > 0:
                score = overlap_count
                overlap_ratio = overlap_count / len(cond_set) if len(cond_set) > 0 else 0
                
                if overlap_ratio >= 0.5 and score > max_score:
                    best, max_score = sol, score
                    print(f"  ‚Üí Match: {sol[:50]}... (Overlap: {overlap_count}/{len(cond_set)} = {overlap_ratio:.0%})")
        
        result = best or "Tidak diketahui"
        print(f"[FC] Result: {result}")
        return result
    
    def certainty_factor(self, input_phrases: List[str]) -> Tuple[str, float]:
        """
        Certainty Factor: hitung CF untuk setiap rule yang match
        """
        input_set = set(input_phrases)
        best_sol, best_cf = "Tidak diketahui", 0
        
        print(f"\n[Certainty Factor] Input: {input_phrases}")
        
        for cond, sol in self.rules.items():
            cond_set = set(cond)
            overlap = cond_set & input_set
            overlap_count = len(overlap)
            
            if overlap_count > 0:
                overlap_ratio = overlap_count / len(cond_set) if len(cond_set) > 0 else 0
                
                if overlap_ratio >= 0.5:
                    adjusted_cf = DEFAULT_CF * overlap_ratio
                    
                    if adjusted_cf > best_cf:
                        best_cf, best_sol = adjusted_cf, sol
                        print(f"  ‚Üí Match: {sol[:50]}... (CF: {adjusted_cf:.2f}, Overlap: {overlap_ratio:.0%})")
        
        print(f"[CF] Result: {best_sol} (CF: {best_cf:.2f})")
        return best_sol, best_cf
    
    def filter_by_product(self, jenis_perbaikan_input: str):
        """
        Filter dataframe berdasarkan jenis perbaikan/produk
        """
        jenis_phrases = set(split_phrases(jenis_perbaikan_input))
        
        filtered_rows = []
        for _, row in self.df.iterrows():
            row_jenis = set(split_phrases(row.get("jenis_perbaikan", "")))
            if jenis_phrases & row_jenis:
                filtered_rows.append(row)
        
        if filtered_rows:
            return pd.DataFrame(filtered_rows)
        else:
            return self.df

print("‚úÖ Rules Engine ready!")

## 6. Load Data dan Inisialisasi

In [None]:
# Load data
df = load_data(FILE_PATH)

print(f"\nüìä Data loaded: {len(df)} rows")
print(f"\nPreview 5 data pertama:")
display(df.head())

# Initialize Rules Engine
rules_engine = RulesEngine(df)

# Output mapping
OUTPUT_MAPPING = get_output_mapping(df)

print(f"\n‚úÖ System initialized!")
print(f"   - Total rules: {len(rules_engine.rules)}")
print(f"   - Output mappings: {len(OUTPUT_MAPPING)}")

## 7. Fungsi Rekomendasi Utama

In [None]:
def get_rekomendasi(gejala_kerusakan: str, jenis_kerusakan: str, jenis_perbaikan: str) -> Dict:
    """
    Fungsi utama untuk mendapatkan rekomendasi perbaikan
    """
    print("="*80)
    print("SPK REKOMENDASI PERBAIKAN")
    print("="*80)
    
    # STEP 1: Filter data berdasarkan jenis perbaikan/produk
    filtered_df = rules_engine.filter_by_product(jenis_perbaikan)
    print(f"\nüìã Filtered data: {len(filtered_df)} rows for '{jenis_perbaikan}'")
    
    # STEP 2: Rebuild rules dari filtered data
    filtered_rules_engine = RulesEngine(filtered_df)
    
    # STEP 3: Split input menjadi phrases
    phrases = split_phrases(gejala_kerusakan) + split_phrases(jenis_kerusakan)
    
    # STEP 4: Forward Chaining
    fc_result = filtered_rules_engine.forward_chaining(phrases)
    fc_found = fc_result != "Tidak diketahui"
    
    # STEP 5: Certainty Factor
    cf_result, cf_value = filtered_rules_engine.certainty_factor(phrases)
    cf_found = cf_result != "Tidak diketahui"
    
    # Perbandingan metode
    methods_comparison = {
        "forward_chaining": {
            "rekomendasi": fc_result,
            "confidence": 100 if fc_found else 0,
            "status": "Match" if fc_found else "Tidak ditemukan"
        },
        "certainty_factor": {
            "rekomendasi": cf_result,
            "confidence": round(cf_value * 100, 2) if cf_found else 0,
            "status": "Match" if cf_found else "Tidak ditemukan"
        }
    }
    
    # Final decision
    if fc_found:
        final = fc_result
        primary_method = "Forward Chaining"
        confidence_score = 100
    elif cf_found:
        final = cf_result
        primary_method = "Certainty Factor"
        confidence_score = round(cf_value * 100, 2)
    else:
        final = "Tidak diketahui"
        primary_method = "None"
        confidence_score = 0
    
    estimasi = OUTPUT_MAPPING.get(final, {"biaya": "-", "waktu": "-"})
    
    return {
        "input": {
            "gejala_kerusakan": gejala_kerusakan,
            "jenis_kerusakan": jenis_kerusakan,
            "jenis_perbaikan": jenis_perbaikan
        },
        "filtered_data_count": len(filtered_df),
        "rekomendasi_perbaikan": final,
        "estimasi_biaya": estimasi["biaya"],
        "estimasi_waktu": estimasi["waktu"],
        "primary_method": primary_method,
        "confidence_score": confidence_score,
        "methods_comparison": methods_comparison
    }

print("‚úÖ Recommendation function ready!")

## 8. Testing & Contoh Penggunaan

### Test Case 1: Kursi Sofa

In [None]:
# Test Case 1
result1 = get_rekomendasi(
    gejala_kerusakan="Kurang nyaman di duduki",
    jenis_kerusakan="Karet / pirnya putus",
    jenis_perbaikan="Kursi Sofa 321"
)

print("\n" + "="*80)
print("üìä HASIL REKOMENDASI")
print("="*80)

import json
print(json.dumps(result1, indent=2, ensure_ascii=False))

### Test Case 2: Custom Input

Silakan ubah input sesuai kebutuhan

In [None]:
# Test Case 2 - Ubah input di sini
result2 = get_rekomendasi(
    gejala_kerusakan="[GANTI DENGAN GEJALA]",
    jenis_kerusakan="[GANTI DENGAN JENIS KERUSAKAN]",
    jenis_perbaikan="[GANTI DENGAN JENIS PERBAIKAN]"
)

print("\n" + "="*80)
print("üìä HASIL REKOMENDASI")
print("="*80)

print(json.dumps(result2, indent=2, ensure_ascii=False))

## 9. Analisis Perbandingan Metode

Visualisasi perbandingan Forward Chaining vs Certainty Factor

In [None]:
import matplotlib.pyplot as plt

def visualize_comparison(result):
    """
    Visualisasi perbandingan confidence 2 metode
    """
    methods = ['Forward\nChaining', 'Certainty\nFactor']
    confidences = [
        result['methods_comparison']['forward_chaining']['confidence'],
        result['methods_comparison']['certainty_factor']['confidence']
    ]
    
    colors = ['#3498db', '#e74c3c']
    
    plt.figure(figsize=(10, 6))
    bars = plt.bar(methods, confidences, color=colors, alpha=0.7, edgecolor='black', linewidth=2)
    
    # Add value labels on bars
    for bar, conf in zip(bars, confidences):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                f'{conf:.1f}%',
                ha='center', va='bottom', fontsize=14, fontweight='bold')
    
    plt.ylabel('Confidence Score (%)', fontsize=12, fontweight='bold')
    plt.title('Perbandingan Metode Forward Chaining vs Certainty Factor', 
              fontsize=14, fontweight='bold', pad=20)
    plt.ylim(0, 110)
    plt.grid(axis='y', alpha=0.3, linestyle='--')
    
    # Add primary method indicator
    primary = result['primary_method']
    plt.text(0.5, 1.05, f'Primary Method: {primary}', 
             transform=plt.gca().transAxes, 
             ha='center', fontsize=11, 
             bbox=dict(boxstyle='round', facecolor='yellow', alpha=0.3))
    
    plt.tight_layout()
    plt.show()

# Visualize result1
visualize_comparison(result1)

## 10. Summary dan Kesimpulan

In [None]:
def print_summary(result):
    """
    Print summary hasil rekomendasi
    """
    print("\n" + "="*80)
    print("üìã RINGKASAN HASIL")
    print("="*80)
    
    print(f"\nüîç INPUT:")
    print(f"   - Gejala Kerusakan: {result['input']['gejala_kerusakan']}")
    print(f"   - Jenis Kerusakan: {result['input']['jenis_kerusakan']}")
    print(f"   - Jenis Perbaikan: {result['input']['jenis_perbaikan']}")
    
    print(f"\nüìä HASIL ANALISIS:")
    print(f"   - Data yang di-filter: {result['filtered_data_count']} rows")
    print(f"   - Metode Utama: {result['primary_method']}")
    print(f"   - Confidence Score: {result['confidence_score']}%")
    
    print(f"\n‚úÖ REKOMENDASI:")
    print(f"   - Perbaikan: {result['rekomendasi_perbaikan']}")
    print(f"   - Estimasi Biaya: Rp {result['estimasi_biaya']:,}" if isinstance(result['estimasi_biaya'], (int, float)) else f"   - Estimasi Biaya: {result['estimasi_biaya']}")
    print(f"   - Estimasi Waktu: {result['estimasi_waktu']}")
    
    print(f"\nüî¨ PERBANDINGAN METODE:")
    fc = result['methods_comparison']['forward_chaining']
    cf = result['methods_comparison']['certainty_factor']
    
    print(f"\n   Forward Chaining:")
    print(f"      - Rekomendasi: {fc['rekomendasi']}")
    print(f"      - Confidence: {fc['confidence']}%")
    print(f"      - Status: {fc['status']}")
    
    print(f"\n   Certainty Factor:")
    print(f"      - Rekomendasi: {cf['rekomendasi']}")
    print(f"      - Confidence: {cf['confidence']}%")
    print(f"      - Status: {cf['status']}")
    
    print("\n" + "="*80)

# Print summary
print_summary(result1)

## 11. Export Hasil ke CSV

In [None]:
# Simpan hasil ke CSV
results_df = pd.DataFrame([{
    'Gejala Kerusakan': result1['input']['gejala_kerusakan'],
    'Jenis Kerusakan': result1['input']['jenis_kerusakan'],
    'Jenis Perbaikan': result1['input']['jenis_perbaikan'],
    'Rekomendasi': result1['rekomendasi_perbaikan'],
    'Estimasi Biaya': result1['estimasi_biaya'],
    'Estimasi Waktu': result1['estimasi_waktu'],
    'Primary Method': result1['primary_method'],
    'Confidence Score': result1['confidence_score'],
    'FC Confidence': result1['methods_comparison']['forward_chaining']['confidence'],
    'CF Confidence': result1['methods_comparison']['certainty_factor']['confidence']
}])

results_df.to_csv('hasil_rekomendasi.csv', index=False)
print("‚úÖ Hasil disimpan ke 'hasil_rekomendasi.csv'")

# Download file
files.download('hasil_rekomendasi.csv')

---

## üìö Dokumentasi

### Cara Penggunaan:

1. Upload file `data_skripsi.xlsx`
2. Jalankan semua cell secara berurutan
3. Ubah input di Test Case 2 sesuai kebutuhan
4. Analisis hasil perbandingan metode

### Metodologi:

**Forward Chaining:**
- Rule-based inference dari fakta ke kesimpulan
- Mencari rule dengan overlap terbanyak
- Confidence: 100% jika match, 0% jika tidak

**Certainty Factor:**
- Menangani ketidakpastian dengan CF value
- CF disesuaikan dengan overlap ratio
- Range: 0-100%

### Author:
Ryan H. - 2025

---