In [1]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
# Membaca dataset
data = pd.read_csv('Automobile.csv')

In [3]:
# Menampilkan informasi awal tentang dataset
print(data.head())

                        name   mpg  cylinders  displacement  horsepower  \
0  chevrolet chevelle malibu  18.0          8         307.0       130.0   
1          buick skylark 320  15.0          8         350.0       165.0   
2         plymouth satellite  18.0          8         318.0       150.0   
3              amc rebel sst  16.0          8         304.0       150.0   
4                ford torino  17.0          8         302.0       140.0   

   weight  acceleration  model_year origin  
0    3504          12.0          70    usa  
1    3693          11.5          70    usa  
2    3436          11.0          70    usa  
3    3433          12.0          70    usa  
4    3449          10.5          70    usa  


In [4]:
# Preprocessing dataset
# Menghapus baris dengan nilai yang hilang
data = data.dropna()

In [5]:
# Mengubah kolom dengan tipe data string menjadi kategori
string_columns = data.select_dtypes(include=['object']).columns
data[string_columns] = data[string_columns].astype('category')

In [6]:
# Mengubah kategori menjadi nilai numerik
data[string_columns] = data[string_columns].apply(lambda x: x.cat.codes)

In [7]:
# Menerapkan algoritma Apriori untuk mencari itemset frekuensi tinggi
frequent_itemsets = apriori(data.astype(bool), min_support=0.1, use_colnames=True)

In [8]:
# Membangkitkan aturan asosiasi dari itemset frekuensi tinggi
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.7)

In [9]:
# Menampilkan aturan asosiasi
for index, rule in rules.iterrows():
    antecedents = ', '.join([str(item) for item in rule.antecedents])
    consequents = ', '.join([str(item) for item in rule.consequents])
    support = rule.support
    confidence = rule.confidence
    lift = rule.lift
    print(f"Rule: {antecedents} -> {consequents}")
    print(f"Support: {support}, Confidence: {confidence}, Lift: {lift}\n")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Support: 0.8239795918367347, Confidence: 0.9969135802469136, Lift: 0.9994632313472893

Rule: name, acceleration, cylinders, displacement -> mpg, origin, model_year, weight
Support: 0.8239795918367347, Confidence: 0.8260869565217391, Lift: 0.9994632313472893

Rule: acceleration, model_year, cylinders, displacement -> name, mpg, origin, weight
Support: 0.8239795918367347, Confidence: 0.8239795918367347, Lift: 1.0

Rule: name, acceleration, model_year, cylinders -> mpg, origin, weight, displacement
Support: 0.8239795918367347, Confidence: 0.8260869565217391, Lift: 0.9994632313472893

Rule: mpg, origin, cylinders, displacement -> name, acceleration, model_year, weight
Support: 0.8239795918367347, Confidence: 0.9969135802469136, Lift: 0.9994632313472893

Rule: name, mpg, origin, cylinders -> acceleration, model_year, weight, displacement
Support: 0.8239795918367347, Confidence: 1.0, Lift: 1.0

Rule: mpg, origin, model_year, cy