In [4]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler, Normalizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = pd.read_csv('Processed_random.csv')

if 'Unnamed: 0' in data.columns:
    data = data.drop('Unnamed: 0', axis=1)

#Removing actions
data=data.drop('action',axis=1)
# Separate features and target
X = data.drop('app',axis=1)
y = data['app']

# List of scalers to test
scalers = {
    "Z-Score Standardization": StandardScaler(),
    "Min-Max Scaling": MinMaxScaler(),
    "Max Absolute Scaling": MaxAbsScaler(),
    "Robust Scaling": RobustScaler(),
    "L2 Normalization": Normalizer(norm='l2')
}

results = {}

# Iterate over each scaling method
for name, scaler in scalers.items():
    print(f"Using {name}")
    
    # Scale features
    X_scaled = scaler.fit_transform(X)
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
    
    # Initialize and train the model
    rf_model = RandomForestClassifier(random_state=42)
    rf_model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = rf_model.predict(X_test)
    
    # Calculate accuracy and save results
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    
    print(f"Accuracy: {accuracy}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    results[name] = {
        "Accuracy": accuracy,
        "Classification Report": report
    }

# Summary of results
print("\nSummary of Scaling Method Accuracies:")
for name, metrics in results.items():
    print(f"{name}: {metrics['Accuracy']:.4f}")


Using Z-Score Standardization
Accuracy: 0.7512437810945274
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.82      0.82        93
           1       0.63      0.66      0.64        90
           2       0.65      0.61      0.63        90
           3       0.60      0.50      0.55        42
           4       0.79      0.78      0.78       202
           5       0.58      0.59      0.59        64
           6       0.52      0.39      0.44        80
           7       0.69      0.68      0.68        81
           8       0.83      0.86      0.84       418
           9       0.63      0.69      0.66       108
          10       0.76      0.79      0.77       198
          11       0.77      0.76      0.76       164
          12       0.80      0.79      0.80       273
          13       0.79      0.81      0.80       107

    accuracy                           0.75      2010
   macro avg       0.70      0.69      0.70      201