Lab 1: Introduction to Jupyter Notebooks & Basic AI in Cybersecurity

AIM: Logistic Regression for malware detection.

In [None]:
# Lab 1: Malware Detection with Logistic Regression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
# Load dataset (Example: CSV with file features and labels)

# Load data
data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/AI in CS/pe_section_headers.csv")
X = data[['size', 'entropy']]
y = data['is_malicious']
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# Train model
model = LogisticRegression()
model.fit(X_train, y_train)
# Evaluate
predictions = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, predictions):.2f}")


Accuracy: 0.96


Lab 2: Spam Detection using Perceptrons

AIM: Classify emails using a perceptron.


In [None]:
# Lab 2: Spam Detection with Perceptron
from sklearn.linear_model import Perceptron
from sklearn.feature_extraction.text import TfidfVectorizer
# Sample data (replace with SpamAssassin dataset)
emails = ["Get free cash now!", "Meeting at 3 PM", "Win a prize!"]
labels = [1, 0, 1] # 1=spam, 0=ham
# Text vectorization
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(emails)
# Train
clf = Perceptron()
clf.fit(X, labels)
# Test
test_email = ["Free lottery"]
print("Spam" if clf.predict(vectorizer.transform(test_email))[0] == 1 else "Ham")

Spam


Lab 3: Phishing Detection with Logistic Regression & Decision Trees

AIM: Detect phishing URLs.

In [None]:
# Lab 3: Phishing URL Detection
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
# Example features: [url_length, num_special_chars, domain_age]
X = [[100, 10, 365], [50, 2, 730], [120, 15, 1]] # 1=phishing, 0=legit
y = [1, 0, 1]
# Logistic Regression
lr_model = LogisticRegression()
lr_model.fit(X, y)
print("LR Prediction:", lr_model.predict([[80, 5, 200]]))
# Decision Tree
dt_model = DecisionTreeClassifier()
dt_model.fit(X, y)
print("DT Prediction:", dt_model.predict([[80, 5, 200]]))

LR Prediction: [1]
DT Prediction: [1]


Lab 4: Image Spam Detection with SVM

AIM: Classify spam images using SVM.


In [None]:
# Lab 4: Image Spam Detection (using HOG features)
from sklearn.svm import SVC
from skimage.feature import hog
import numpy as np
# Example: HOG features from images (replace with real data)
X = [np.random.rand(100) for _ in range(10)] # Simulated HOG features
y = [1, 0, 1, 0, 1, 0, 0, 1, 0, 1] # 1=spam, 0=normal
# Train SVM
clf = SVC(kernel='linear')
clf.fit(X, y)
print("Prediction:", clf.predict([np.random.rand(100)]))

Prediction: [1]


Lab 5: Malware Classification with Naive Bayes

AIM: Classify malware families.

In [None]:
# Lab 5: Naive Bayes for Malware
from sklearn.naive_bayes import GaussianNB
# Example: Features = [file_size, entropy, API_calls]
X = [[5000, 6.2, 150], [2000, 5.1, 50], [8000, 7.0, 200]]
y = ["Trojan", "Benign", "Ransomware"]
# Train
model = GaussianNB()
model.fit(X, y)
print("Predicted:", model.predict([[6000, 6.5, 180]]))

Predicted: ['Trojan']


Lab 6: NLP for Spam Detection (BERT/Word2Vec)

AIM: Spam detection using NLP.

In [None]:
# Lab 6: NLP Spam Detection (using BERT embeddings)
!pip install transformers
from transformers import BertTokenizer, BertForSequenceClassification
import torch
# Load pre-trained BERT
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
# Example email
inputs = tokenizer("Win a free iPhone!", return_tensors="pt")
outputs = model(**inputs)
print("Spam Probability:", torch.sigmoid(outputs.logits[0,1]).item())



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Spam Probability: 0.43773379921913147


Lab 7: Keystroke Dynamics for Authentication

AIM: Authenticate users via keystroke timing.


In [None]:
# Lab 7: Keystroke Authentication
from sklearn.ensemble import RandomForestClassifier
# Features: [dwell_time, flight_time, ...]
X = [[200, 150], [180, 120], [220, 180]] # User1=0, User2=1
y = [0, 1, 0]
# Train
clf = RandomForestClassifier()
clf.fit(X, y)
print("Authenticated User:", clf.predict([[190, 140]]))

Authenticated User: [0]


Lab 8: Facial Recognition with OpenCV

AIM: Biometric authentication using facial recognition.

In [None]:
# Lab 8: Face Recognition (using OpenCV)
import cv2
# Load pre-trained face detector
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
# Detect faces in an image
img = cv2.imread("/content/drive/MyDrive/Colab Notebooks/AI in CS/IMG20230606120128 (2) (1).jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
print(f"Faces Detected: {len(faces)}")


Faces Detected: 1


Lab 9: Fraud Detection with XGBoost

AIM: Detect fraudulent transactions.

In [None]:
# Lab 9: Fraud Detection with XGBoost
!pip install xgboost
from xgboost import XGBClassifier
# Example: [amount, time, location]
X = [[1000, 12, 1], [20, 15, 0], [5000, 3, 1]] # 1=fraud, 0=legit
y = [1, 0, 1]
# Train
model = XGBClassifier()
model.fit(X, y)
print("Fraud Prediction:", model.predict([[2000, 2, 1]]))

Fraud Prediction: [1]


Lab 10: Cloud AI with IBM Watson

AIM: Anomaly detection using IBM Watson.

In [None]:
from sklearn.ensemble import IsolationForest

data = [[100], [110], [5000], [120], [130]]
clf = IsolationForest(contamination=0.2)
clf.fit(data)
preds = clf.predict(data)

for i, val in enumerate(data):
    status = "Anomaly" if preds[i] == -1 else "Normal"
    print(f"Value: {val[0]} => {status}")


Value: 100 => Normal
Value: 110 => Normal
Value: 5000 => Anomaly
Value: 120 => Normal
Value: 130 => Normal
