In [25]:
#1. Preparing the Dataset
import pandas as pd

# Load the dataset
df = pd.read_csv('/content/malicious_phish.csv')

# Display the first few rows of the dataset
print(df.head())


                                                 url        type
0                                   br-icloud.com.br    phishing
1                mp3raid.com/music/krizz_kaliko.html      benign
2                    bopsecrets.org/rexroth/cr/1.htm      benign
3  http://www.garage-pirenne.be/index.php?option=...  defacement
4  http://adventure-nicaragua.net/index.php?optio...  defacement


In [26]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout
from sklearn.metrics import accuracy_score
import joblib

# Load your data (assuming 'df' is your DataFrame containing 'url' and 'type' columns)
# df = pd.read_csv('your_data.csv')

# Prepare the data
X = df['url']
y = df['type']

# Encode the target labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Tokenize the text data
max_words = 10000  # Maximum number of words to keep
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X)
X = tokenizer.texts_to_sequences(X)

# Pad sequences to ensure uniform length
max_sequence_length = 100  # Maximum sequence length
X = pad_sequences(X, maxlen=max_sequence_length)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the CNN model
model = Sequential()
model.add(Embedding(max_words, 128, input_length=max_sequence_length))
model.add(Conv1D(128, 5, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=1, batch_size=32, validation_split=0.2)

# Evaluate the model on the test set
_, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy: {accuracy}')

# Save the model
model.save('cnn_model.h5')


Accuracy: 0.5617672204971313


  saving_api.save_model(


In [27]:
from tensorflow.keras.models import load_model
import numpy as np

# Sample URL to test
sample_url = "https://www.facebook.com"

# Tokenize and pad the sample URL
sample_sequence = tokenizer.texts_to_sequences([sample_url])
sample_padded = pad_sequences(sample_sequence, maxlen=max_sequence_length)

# Load the trained model
loaded_model = load_model('/content/cnn_model.h5')

# Make predictions
predictions = loaded_model.predict(sample_padded)

# Decode the predictions
predicted_label = label_encoder.inverse_transform([np.argmax(predictions)])

print(f"Predicted label for '{sample_url}': {predicted_label}")


Predicted label for 'https://www.facebook.com': ['benign']


In [28]:
!pip install sagemaker



In [30]:
#2. Deployment
import boto3
import sagemaker
from sagemaker.tensorflow import TensorFlowModel
import tarfile
import os

# Define your AWS credentials and region
aws_access_key_id = 'ASIAW3MEBAAHWTKVOER5'
aws_secret_access_key = '/ue6np9f6UaRrf2nivmZ3NKC99YgC5AuMhoTiYTR'
aws_session_token = 'IQoJb3JpZ2luX2VjEFQaCXVzLXdlc3QtMiJHMEUCIHayg1MFXN92XbQfMBLgFVcUeIrqZTuH4tD+S/ZXdvjGAiEA/O8xzgWCPL68kn1Ewe5GxDDFZqfvm4bwLcfljfb1Sf4qugIIrf//////////ARAAGgw0NzExMTI2Nzk0MzkiDFyzaVWfLOcLZhC6lyqOAlTGjqN+Oz4c87x1sVt+fgc75AlXPN17iFfyEQvcoAr8axFM9qJ+q+YLLRu5ihJknIZPMUeIvJAlTfPoAAaHxQ7LNhwPt3sBO71jca2Ymv/j1xO2L1ciznElBgrgRHPtQNxEMSWki/01G4ETTQRL9fRPHej/zL1jfQCb7uqZ+ZWJXkpM36XsmEH3IJgSqmErFeX5zQ33HlpBG9L41VkjYau8x/aV1x2dNIw846KkDh2moqoAe2gszlMdYhZHcvG11K5WdOzx7b0qSOWHNgleb+eafruX+t56MLDbseOU+5cZ828uXnOlEaRJR1C88pCQe09PARFgvihuw76C5dhHqB7lCyVqt1qOt8dPqUhxczD50N+xBjqdAVyHQ3CvBUgUv8ccGQQyumg6+9Lojh9Nt6wzKWzwfBNoIsDy2A5M1wYpyyeilqKNhtjSib07e7Hks5C3FzIYKki9HZpraZmbUwdhFwmuq2chXsKr549HK+29o5KDsmRC8vSAScSFQhWKLR2HGNgHsYKQCWxKIqK6nJtheCN0DrkmJEyYxzntjGomScPl1Kul1Slp0geKiuA1Ei2BNsk='
region = 'us-east-1'

# Initialize SageMaker session and specify role
session = boto3.session.Session(aws_access_key_id=aws_access_key_id,
                                 aws_secret_access_key=aws_secret_access_key,
                                 aws_session_token=aws_session_token,
                                 region_name=region)
sagemaker_session = sagemaker.Session(boto_session=session)

# Specify the S3 bucket and prefix for model data
bucket = 'temiproject'
prefix = 'temiproject'

# Define the path to your model artifact and the desired tar.gz filename
model_artifact = '/content/cnn_model.h5'
tar_gz_filename = 'cnn_model.tar.gz'

# Create a tar.gz file from the model artifact
with tarfile.open(tar_gz_filename, 'w:gz') as tar:
    tar.add(model_artifact, arcname=os.path.basename(model_artifact))

# Upload the tar.gz model artifact to S3
s3_tar_gz_model_artifact = sagemaker_session.upload_data(tar_gz_filename, bucket=bucket, key_prefix=prefix)

# Create a SageMaker TensorFlowModel with the tar.gz model artifact
tensorflow_model = TensorFlowModel(model_data=s3_tar_gz_model_artifact,
                                   role='LabRole',  # Specify your SageMaker role ARN here
                                   framework_version='2.6.0',
                                   sagemaker_session=sagemaker_session)

# Deploy the model as an endpoint
endpoint_name = 'projectendpoint'
predictor = tensorflow_model.deploy(initial_instance_count=1,
                                    instance_type='ml.t2.medium',
                                    endpoint_name=endpoint_name)

"""
Replace placeholders like '1)your_access_key_id', '2)your_secret_access_key', '3)your_aws_region', 'your_sagemaker_role_arn',
'your_s3_bucket_name', 'your_s3_prefix', 'path_to_your_model_artifact', and 'your_endpoint_name' with your actual AWS credentials,
configurations, and paths.
Get 1, 2, & 3 on AWS. Its under Details (on the right side of the screen. Hit the start button first)
"""

S3UploadFailedError: Failed to upload cnn_model.tar.gz to temiproject/temiproject/cnn_model.tar.gz: An error occurred (AccessDenied) when calling the CreateMultipartUpload operation: Access Denied

**Streamlit Client**

In [31]:
!pip install streamlit boto3


Collecting streamlit
  Downloading streamlit-1.34.0-py2.py3-none-any.whl (8.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.5/8.5 MB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.0-py2.py3-none-any.whl (6.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m39.8 MB/s[0m eta [36m0:00:00[0m
Collecting watchdog>=2.1.5 (from streamlit)
  Downloading watchdog-4.0.0-py3-none-manylinux2014_x86_64.whl (82 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.0/83.0 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading gitdb-4.0

In [32]:
!streamlit run app.py
'''Upload the app.py to your colab environment.
   Open the app and make changes to it. I've provided guidelines there as well.
   Run this code when u r done.
'''


Usage: streamlit run [OPTIONS] TARGET [ARGS]...
Try 'streamlit run --help' for help.

Error: Invalid value: File does not exist: app.py


"Upload the app.py to your colab environment.\n   Open the app and make changes to it. I've provided guidelines there as well.\n   Run this code when u r done.\n"