In [38]:
#Build a spam filter using Python and the Naive Bayes algorithm

import pandas as pd 
from sklearn.feature_extraction.text import CountVectorizer 
from sklearn.naive_bayes import MultinomialNB 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score, classification_report 
 
# Load Dataset 
file_path = r"C:\Users\Samruddhi\Downloads\spam.csv" 
data = pd.read_csv(file_path, encoding='latin-1', usecols=[0, 1], 
names=['label', 'message'], header=0) 
 
# Data Preprocessing 
data['label'] = data['label'].map({'ham': 0, 'spam': 1}) 
X = data['message'] #input
y = data['label'] #target
 
vectorizer = CountVectorizer() 
X = vectorizer.fit_transform(X) 
 
# Split the data into training and testing sets 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, 
random_state=42) 
 
# Train the Naive Bayes model 
model = MultinomialNB() 
model.fit(X_train, y_train) 
 
# Predict on the test data 
y_pred = model.predict(X_test) 
 
# Evaluate the model 
print("Accuracy:", accuracy_score(y_test, y_pred)) 
print("Classification Report:\n", classification_report(y_test, y_pred)) 
 
# Test with new messages 
def predict_message(message): 
    msg_transformed = vectorizer.transform([message]) 
    prediction = model.predict(msg_transformed) 
    return 'Spam' if prediction[0] == 1 else 'Ham' 
 
print(predict_message("Free entry in 2 a wkly comp to win FA Cup final"))

print(predict_message("Rofl. Its true to its name"))

Accuracy: 0.9784688995215312
Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      1453
           1       0.92      0.92      0.92       219

    accuracy                           0.98      1672
   macro avg       0.95      0.95      0.95      1672
weighted avg       0.98      0.98      0.98      1672

Spam
Ham


In [43]:
#Classify DDoS attacks with Artificial Intelligence 

import pandas as pd  
from sklearn.model_selection import train_test_split  
from sklearn.preprocessing import StandardScaler, LabelEncoder  
from sklearn.ensemble import RandomForestClassifier  
from sklearn.metrics import classification_report, accuracy_score  

# Load the dataset  
file_path = r"C:\Users\Samruddhi\OneDrive\Desktop\DATASETS\aics datasets\TCP-SYNC DATASET.csv"     
df = pd.read_csv(file_path) 

# Drop non-numeric columns except the target label  
df_cleaned = df.drop(columns=["Flow ID", "Src IP", "Dst IP", "Timestamp"]).dropna()  

# Encode the target variable  
label_encoder = LabelEncoder()  
df_cleaned["Label"] = label_encoder.fit_transform(df_cleaned["Label"])  

# Split features and target  
X = df_cleaned.drop(columns=["Label"])  
y = df_cleaned["Label"]  

# Standardize the features  
scaler = StandardScaler()  
X_scaled = scaler.fit_transform(X)  

# Split into training and test sets  
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, 
test_size=0.2, random_state=42)  

# Train a Random Forest classifier  
model = RandomForestClassifier(n_estimators=100, random_state=42)  
model.fit(X_train, y_train)  

# Make predictions  
y_pred = model.predict(X_test)  

# Evaluate the model  
accuracy = accuracy_score(y_test, y_pred)  
report = classification_report(y_test, y_pred, 
target_names=label_encoder.classes_) 

print(f"Model Accuracy: {accuracy * 100:.2f}%")  
print("Classification Report:\n", report)  


Model Accuracy: 100.00%
Classification Report:
               precision    recall  f1-score   support

        DDOS       1.00      1.00      1.00       689
      Normal       1.00      1.00      1.00       558

    accuracy                           1.00      1247
   macro avg       1.00      1.00      1.00      1247
weighted avg       1.00      1.00      1.00      1247



In [18]:
#Split sample data into training and test sets
import pandas as pd 
from sklearn import datasets 
from sklearn.model_selection import train_test_split 
 
# Load the Iris dataset 
iris = datasets.load_iris() 
X = pd.DataFrame(iris.data, columns=iris.feature_names) 
y = pd.DataFrame(iris.target, columns=['Target']) 
#print(iris)

# Display first few rows 
print("First 5 rows of data:") 
print(X.head()) 

print(y.head())
# 0	Iris-setosa
# 1	Iris-versicolor
# 2	Iris-virginica
# Split data into training and test sets (70% training, 30% testing) 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) 
 
# Display results 
print(f"\nTotal Samples: {len(X)}") 
print(f"Training Samples: {len(X_train)}") 
print(f"Test Samples: {len(X_test)}") 

First 5 rows of data:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.1               3.5                1.4               0.2
1                4.9               3.0                1.4               0.2
2                4.7               3.2                1.3               0.2
3                4.6               3.1                1.5               0.2
4                5.0               3.6                1.4               0.2
   Target
0       0
1       0
2       0
3       0
4       0

Total Samples: 150
Training Samples: 105
Test Samples: 45


In [34]:
# Perform feature engineering operations on raw data

import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load Dataset
train_path = r"C:\Users\Samruddhi\OneDrive\Desktop\DATASETS\aics datasets\train.csv"
test_path = r"C:\Users\Samruddhi\OneDrive\Desktop\DATASETS\aics datasets\test.csv"

train_data = pd.read_csv(train_path)
test_data = pd.read_csv(test_path)

print("Train Data Shape:", train_data.shape)
print("Test Data Shape:", test_data.shape)

# Combine Train and Test for Consistent Feature Engineering
data = pd.concat([train_data, test_data], axis=0, ignore_index=True)

# 1. Handle Missing Values (avoid inplace=True with chained assignment)
data.isnull().sum()

data['Age'] = data['Age'].fillna(data['Age'].median())
data['Embarked'] = data['Embarked'].fillna(data['Embarked'].mode()[0])
data['Fare'] = data['Fare'].fillna(data['Fare'].median())
data['Survived'].fillna(data['Survived'].mode()[0])

# 2. Drop Columns with Excessive Missing Data
data = data.drop(['Cabin', 'Ticket', 'Name',], axis=1)

# 3. Encode Categorical Features
label_encoder = LabelEncoder()
data['Sex'] = label_encoder.fit_transform(data['Sex'])
data['Embarked'] = label_encoder.fit_transform(data['Embarked'])
# "male" → 1, "female" → 0, embarked C->0,Q->1,S->2

# 4. Feature Scaling
scaler = StandardScaler()
data[['Age', 'Fare']] = scaler.fit_transform(data[['Age', 'Fare']])
#Age and Fare become easier for the computer to understand and use to make smart guesses.

# 5. Create New Features
data['FamilySize'] = data['SibSp'] + data['Parch'] + 1 #FamilySize=siblings/spouses + parents/children + self
data['IsAlone'] = (data['FamilySize'] == 1).astype(int) #IsAlone , 1 if the passenger is alone, 0

print("Feature Engineering Completed.")
print(data.head())

# Split back to train and test
train_final = data[:len(train_data)].copy()
test_final = data[len(train_data):].copy()

print("Final Train Shape:", train_final.shape)
print("Final Test Shape:", test_final.shape)


Train Data Shape: (891, 12)
Test Data Shape: (418, 11)
Feature Engineering Completed.
   PassengerId  Survived  Pclass  Sex       Age  SibSp  Parch      Fare  \
0            1       0.0       3    1 -0.581628      1      0 -0.503291   
1            2       1.0       1    0  0.658652      1      0  0.734744   
2            3       1.0       3    0 -0.271558      0      0 -0.490240   
3            4       1.0       1    0  0.426099      1      0  0.383183   
4            5       0.0       3    1  0.426099      0      0 -0.487824   

   Embarked  FamilySize  IsAlone  
0         2           2        0  
1         0           2        0  
2         2           1        1  
3         2           2        0  
4         2           1        1  
Final Train Shape: (891, 11)
Final Test Shape: (418, 11)


In [35]:
train_final

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,FamilySize,IsAlone
0,1,0.0,3,1,-0.581628,1,0,-0.503291,2,2,0
1,2,1.0,1,0,0.658652,1,0,0.734744,0,2,0
2,3,1.0,3,0,-0.271558,0,0,-0.490240,2,1,1
3,4,1.0,1,0,0.426099,1,0,0.383183,2,2,0
4,5,0.0,3,1,0.426099,0,0,-0.487824,2,1,1
...,...,...,...,...,...,...,...,...,...,...,...
886,887,0.0,2,1,-0.194041,0,0,-0.392119,2,1,1
887,888,1.0,1,0,-0.814181,0,0,-0.063437,2,1,1
888,889,0.0,3,0,-0.116523,1,2,-0.190076,2,4,0
889,890,1.0,1,1,-0.271558,0,0,-0.063437,0,1,1


In [37]:
test_final

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,FamilySize,IsAlone
891,892,,3,1,0.387341,0,0,-0.492093,1,1,1
892,893,,3,0,1.356310,1,0,-0.508125,2,2,0
893,894,,2,1,2.519072,0,0,-0.456164,1,1,1
894,895,,3,1,-0.194041,0,0,-0.475981,2,1,1
895,896,,3,0,-0.581628,1,1,-0.405895,2,3,0
...,...,...,...,...,...,...,...,...,...,...,...
1304,1305,,3,1,-0.116523,0,0,-0.487824,2,1,1
1305,1306,,1,0,0.736169,0,0,1.462034,0,1,1
1306,1307,,3,1,0.697411,0,0,-0.503291,2,1,1
1307,1308,,3,1,-0.116523,0,0,-0.487824,2,1,1


In [31]:
data.isnull().sum()

PassengerId    0
Pclass         0
Sex            0
Age            0
SibSp          0
Parch          0
Fare           0
Embarked       0
FamilySize     0
IsAlone        0
dtype: int64

In [28]:
train_data

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C
