<a href="https://colab.research.google.com/github/SilvianaSiagian/klasifikasi-malware/blob/main/Malware.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report


# Membaca file CSV
data = pd.read_csv('android_traffic.csv', delimiter=';')

# Menghapus kolom dengan lebih dari 60% nilai NaN
missing_percent = data.isnull().sum() / len(data)
columns_to_drop = missing_percent[missing_percent > 0.6].index
data = data.drop(columns=columns_to_drop)

# Mendapatkan kolom-kolom non-numerik
non_numeric_cols = data.select_dtypes(exclude=['float64', 'int64']).columns.tolist()


In [11]:
# Menggunakan LabelEncoder untuk mengubah nilai non-numerik menjadi numerik
label_encoders = {}
for col in non_numeric_cols:
    label_encoders[col] = LabelEncoder()
    data[col] = label_encoders[col].fit_transform(data[col].astype(str))

# Memisahkan fitur dan label
fitur = data.drop('type', axis=1)  # Fitur
label = data['type']  # Label

# Membagi data menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(fitur, label, test_size=0.2, random_state=42)

# Membuat model Decision Tree Classifier
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)

In [12]:
# Memprediksi label pada data uji
prediksi = decision_tree.predict(X_test)

# Evaluasi model
akurasi = accuracy_score(y_test, prediksi)
laporan_kelasifikasi = classification_report(y_test, prediksi)

# Menampilkan hasil evaluasi
print(f'Akurasi: {akurasi}')
print(f'Laporan Klasifikasi:\n{laporan_kelasifikasi}')

Akurasi: 0.9917144678138942
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       938
           1       0.99      0.99      0.99       631

    accuracy                           0.99      1569
   macro avg       0.99      0.99      0.99      1569
weighted avg       0.99      0.99      0.99      1569



In [13]:
pip install graphviz



In [14]:
from sklearn.tree import export_graphviz
import graphviz

# Menyimpan struktur pohon keputusan dalam format dot
dot_data = export_graphviz(decision_tree, out_file=None,
                           feature_names=fitur.columns,
                           class_names=label_encoders['type'].classes_,
                           filled=True, rounded=True,
                           special_characters=True)

# Membuat objek Graph dari file dot
graph = graphviz.Source(dot_data)

# Menyimpan struktur pohon keputusan dalam format file (contoh: PNG)
graph.render("decision_tree")

# Menampilkan pohon keputusan dalam bentuk grafik
graph.view("decision_tree")

'decision_tree.pdf'