In [1]:
import tkinter as tk
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

# Read the data from the CSV file
urls_data = pd.read_csv("urldata.csv")

def makeTokens(f):
    tkns_BySlash = str(f.encode('utf-8')).split('/')  # make tokens after splitting by slash
    total_Tokens = []

    for i in tkns_BySlash:
        tokens = str(i).split('-')  # make tokens after splitting by dash
        tkns_ByDot = []

        for j in range(0, len(tokens)):
            temp_Tokens = str(tokens[j]).split('.')  # make tokens after splitting by dot
            tkns_ByDot = tkns_ByDot + temp_Tokens
        total_Tokens = total_Tokens + tokens + tkns_ByDot

    total_Tokens = list(set(total_Tokens))  # remove redundant tokens

    if 'com' in total_Tokens:
        total_Tokens.remove('com')  # removing .com since it occurs a lot of times and should not be included in our features
 
    return total_Tokens

# Create the vectorizer
vectorizer = TfidfVectorizer(tokenizer=makeTokens)

# Extract features and labels from the data
url_list = urls_data["url"]
y = urls_data["label"]

# Transform the URLs into numerical representation
X = vectorizer.fit_transform(url_list)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  

# Create and train the logistic regression model
logit = LogisticRegression(solver='sag')
logit.fit(X_train, y_train)
print("Accuracy ",logit.score(X_test, y_test))

# Function to handle the button click event
def predict_url():
    user_input = entry.get()
    user_input = [user_input]
    user_input = vectorizer.transform(user_input)
    prediction = logit.predict(user_input)
    result_label.configure(text=f"Prediction: {prediction[0]}")

# Create the GUI window
window = tk.Tk()
window.title("URL Classification")
window.geometry("800x500")
f1=tk.Frame(window,bg='light green',borderwidth=10,relief="sunken")
f1.pack(side="bottom",fill="x")
lable=tk.Label(f1, text="PROJECT ON MALICIOUS URL DETECTION",bg='pink',borderwidth=5,relief="groove")
lable.pack(pady= 10,padx=100)
# Create the input label and entry field
label = tk.Label(window, text="Enter URL:",bg='yellow',relief="groove",borderwidth=15)
label.pack()
entry = tk.Entry(window)
entry.pack()

# Create the prediction button
button = tk.Button(window, text="Predict",fg='red',command=predict_url)
button.pack(padx=60)

# Create the label to display the prediction result
result_label = tk.Label(window, text="",bg="light blue",borderwidth=10,relief="sunken")
result_label.pack()

# Start the GUI event loop
window.mainloop()



Accuracy  0.9620068019121459
