In [39]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split 

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the data
data = pd.read_csv('dataset1.csv')

# Preprocess the data
le = LabelEncoder()
data['attack'] = le.fit_transform(data['attack'])
data = data.drop(['city', 'region', 'country', 'location','timezone'], axis=1)
data = pd.get_dummies(data, columns=['ip', 'organization', 'postalcode' ])

# Split the data into training and testing sets
X = data.drop(['attack'], axis=1)
y = data['attack']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Perform KNN classification
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
rmse_knn = mean_squared_error(y_test, y_pred_knn, squared=False)
r2_knn = r2_score(y_test, y_pred_knn)


# Train and evaluate logistic regression model
lr = LogisticRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
acc_lr = accuracy_score(y_test, y_pred)
y_pred_logreg = lr.predict(X_test)
rmse_logreg = mean_squared_error(y_test, y_pred_logreg, squared=False)
r2_logreg = r2_score(y_test, y_pred_logreg)

# Train and evaluate random forest model
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
acc_rf = accuracy_score(y_test, y_pred)
y_pred_rf = rf.predict(X_test)
rmse_rf = mean_squared_error(y_test, y_pred_rf, squared=False)
r2_rf = r2_score(y_test, y_pred_rf)


print('KNN score:', knn.score(X_test, y_test))
print('Logistic Regression accuracy: ', acc_lr)
print('Random Forest accuracy: ', acc_rf)
print('RMSE AND R^2 SCORE')
print('KNN RMSE:', rmse_knn)
print('KNN R2 score:', r2_knn)
print('Logistic Regression RMSE:', rmse_logreg)
print('Logistic Regression R2 score:', r2_logreg)
print('Random Forest RMSE:', rmse_rf)
print('Random Forest R2 score:', r2_rf)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


KNN score: 0.9380952380952381
Logistic Regression accuracy:  0.9333333333333333
Random Forest accuracy:  0.9238095238095239
RMSE AND R^2 SCORE
KNN RMSE: 0.47809144373375745
KNN R2 score: 0.887243277104121
Logistic Regression RMSE: 0.4629100498862757
Logistic Regression R2 score: 0.8942905722851134
Random Forest RMSE: 0.4309458036856673
Random Forest R2 score: 0.9083851626470983


In [38]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the dataset
df = pd.read_csv("keylogger1.csv")

# Preprocessing
le = LabelEncoder()
df["username"] = le.fit_transform(df["username"])
df["password"] = le.fit_transform(df["password"])
df["attack"] = le.fit_transform(df["attack"])
scaler = MinMaxScaler()
df[["username", "password"]] = scaler.fit_transform(df[["username", "password"]])

# Split the dataset into training and testing sets
X = df.drop("attack", axis=1)
y = df["attack"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# KNN
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)
knn_acc = accuracy_score(y_test, knn_pred)
y_pred_knn = knn.predict(X_test)
rmse_knn = mean_squared_error(y_test, y_pred_knn, squared=False)
r2_knn = r2_score(y_test, y_pred_knn)



# Logistic Regression
lr = LogisticRegression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)
lr_acc = accuracy_score(y_test, lr_pred)
y_pred_logreg = lr.predict(X_test)
rmse_logreg = mean_squared_error(y_test, y_pred_logreg, squared=False)
r2_logreg = r2_score(y_test, y_pred_logreg)

# Random Forest
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)
rf_acc = accuracy_score(y_test, rf_pred)
y_pred_rf = rf.predict(X_test)
rmse_rf = mean_squared_error(y_test, y_pred_rf, squared=False)
r2_rf = r2_score(y_test, y_pred_rf)

print("KNN accuracy:", knn_acc)
print("Logistic Regression accuracy:", lr_acc)
print("Random Forest accuracy:", rf_acc)
print('RMSE AND R^2 SCORE')
print('KNN RMSE:', rmse_knn)
print('KNN R2 score:', r2_knn)
print('Logistic Regression RMSE:', rmse_logreg)
print('Logistic Regression R2 score:', r2_logreg)
print('Random Forest RMSE:', rmse_rf)
print('Random Forest R2 score:', r2_rf)

KNN accuracy: 0.8907563025210085
Logistic Regression accuracy: 0.5210084033613446
Random Forest accuracy: 0.9915966386554622
RMSE AND R^2 SCORE
KNN RMSE: 0.3305203435175989
KNN R2 score: 0.5592592592592593
Logistic Regression RMSE: 0.6920921879624531
Logistic Regression R2 score: -0.9324786324786323
Random Forest RMSE: 0.09166984970282113
Random Forest R2 score: 0.9660968660968661
