In [None]:
# Step 1: Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Step 2: Load the dataset
data = pd.read_csv('user_behavior_dataset.csv')

# Step 3: Inspect the data
print(data.head())  # Check the first few rows of the dataset
print(data.info())  # Check the structure of the data

# Step 4: Data Preprocessing

# Handle missing values if any
# For simplicity, we drop rows with missing values here, but you can fill them using fillna() if necessary
data = data.dropna()

# If the dataset contains categorical variables, we need to encode them using LabelEncoder or OneHotEncoder
# Example: encoding a column 'Category' (if exists)
# from sklearn.preprocessing import LabelEncoder
# label_encoder = LabelEncoder()
# data['Category'] = label_encoder.fit_transform(data['Category'])

# Step 5: Define features and target variable
# Assuming 'target' is the target column, replace 'target' with the actual column name of your dataset.
X = data.drop('target', axis=1)  # Drop the target column for features
y = data['target']  # The target variable

# Step 6: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 7: Scale the features (important for SVM performance)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 8: Train the SVM model
svm = SVC(kernel='linear')  # You can change the kernel to 'rbf', 'poly', etc., if needed
svm.fit(X_train_scaled, y_train)

# Step 9: Predict on the test data
y_pred = svm.predict(X_test_scaled)

# Step 10: Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:')
print(classification_report(y_test, y_pred))
print('Confusion Matrix:')
cm = confusion_matrix(y_test, y_pred)
print(cm)

# Step 11: Visualize the Confusion Matrix
plt.figure(figsize=(6, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()
