In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
import os

In [None]:

# 1. Simulate or Load APK Dataset

def load_dataset(path="android_apk_dataset.csv"):
    if not os.path.exists(path):
        print("Dataset not found. Generating synthetic APK data...")

        data = {
            "permissions_count": np.random.randint(5, 100, 1000),
            "uses_location": np.random.randint(0, 2, 1000),
            "uses_sms": np.random.randint(0, 2, 1000),
            "uses_camera": np.random.randint(0, 2, 1000),
            "uses_contacts": np.random.randint(0, 2, 1000),
            "apk_size_kb": np.random.randint(100, 50000, 1000),
            "is_malicious": np.random.randint(0, 2, 1000)
        }

        df = pd.DataFrame(data)
        df.to_csv(path, index=False)
    else:
        df = pd.read_csv(path)

    print(f"Loaded dataset with shape: {df.shape}")
    return df

In [None]:
def preprocess(df):
    X = df.drop("is_malicious", axis=1)
    y = df["is_malicious"]
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y, scaler

In [None]:

# 3. Train the Model

def train_model(X_train, y_train):
    model = DecisionTreeClassifier(max_depth=10)
    model.fit(X_train, y_train)
    return model