In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [None]:
spam_df = pd.read_csv('/content/spam.csv', encoding='latin-1', usecols=[0, 1],
names=['label', 'message'], header=0)

In [None]:
spam_df.head()

Unnamed: 0,label,message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [None]:
#data preprocessing
#converts ham->0 and spam->1

spam_df['label'] = spam_df['label'].map({'ham': 0, 'spam': 1})
X = spam_df['message']
y = spam_df['label']

In [None]:
spam_df

Unnamed: 0,label,message
0,0,"Go until jurong point, crazy.. Available only ..."
1,0,Ok lar... Joking wif u oni...
2,1,Free entry in 2 a wkly comp to win FA Cup fina...
3,0,U dun say so early hor... U c already then say...
4,0,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5567,1,This is the 2nd time we have tried 2 contact u...
5568,0,Will Ì_ b going to esplanade fr home?
5569,0,"Pity, * was in mood for that. So...any other s..."
5570,0,The guy did some bitching but I acted like i'd...


In [None]:
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(X)

In [None]:
vectorizer

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)

In [None]:
# Train the Naive Bayes model
model = MultinomialNB()
model.fit(X_train, y_train)

In [None]:
# Predict on the test data
y_pred = model.predict(X_test)

In [None]:
email_ham = ["baseball tickets later"]
email_ham_count = vectorizer.transform(email_ham)
prediction = model.predict(email_ham_count)
print(prediction)

[0]


In [None]:
email_spam = ["reward money click"]
email_spam_count = vectorizer.transform(email_spam)
prediction = model.predict(email_spam_count)
print(prediction)

[1]


In [None]:
# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9784688995215312
Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      1453
           1       0.92      0.92      0.92       219

    accuracy                           0.98      1672
   macro avg       0.95      0.95      0.95      1672
weighted avg       0.98      0.98      0.98      1672



In [None]:
# Test with new messages
def predict_message(message):
    msg_transformed = vectorizer.transform([message])
    prediction = model.predict(msg_transformed)
    return 'Spam' if prediction[0] == 1 else 'Ham'


In [None]:
print(predict_message("Free entry in 2 a wkly comp to win FA Cup final"))

print(predict_message("Rofl. Its true to its name"))


Spam
Ham


AICS Practical 2

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

In [2]:
df = pd.read_csv('/content/TCP-SYNC DATASET.csv')

In [3]:
# Drop non-numeric columns except the target label
df_cleaned = df.drop(columns=["Flow ID", "Src IP", "Dst IP",
"Timestamp"]).dropna()

In [4]:
# Encode the target variable
label_encoder = LabelEncoder()
df_cleaned["Label"] = label_encoder.fit_transform(df_cleaned["Label"])

In [5]:
# Split features and target
X = df_cleaned.drop(columns=["Label"])
y = df_cleaned["Label"]

In [6]:
# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [7]:
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y,
test_size=0.2, random_state=42)

In [8]:
# Train a Random Forest classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [9]:
# Make predictions
y_pred = model.predict(X_test)

In [10]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred,
target_names=label_encoder.classes_)
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:\n", report)

Model Accuracy: 100.00%
Classification Report:
               precision    recall  f1-score   support

        DDOS       1.00      1.00      1.00       689
      Normal       1.00      1.00      1.00       558

    accuracy                           1.00      1247
   macro avg       1.00      1.00      1.00      1247
weighted avg       1.00      1.00      1.00      1247

