In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_predict, StratifiedKFold
import pandas as pd
from sklearn.preprocessing import StandardScaler


In [3]:
data = pd.read_csv('D:\\Bot project\\final_data\\final.csv') 

In [4]:
# Preprocessing the data
X = data.drop(['Unnamed: 0', 'created_at', 'id', 'lang', 'location', 'screen_name', 'account_type'], axis=1)
X = pd.get_dummies(X, drop_first=True)
y = data['account_type'].apply(lambda x: 1 if x == 'bot' else 0)

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [5]:
# Initialize the model
model_lr = LogisticRegression(C=1.0, penalty='l2', solver='liblinear', random_state=42)

# Apply 5-fold cross-validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
y_pred_lr = cross_val_predict(model_lr, X_scaled, y, cv=cv)

# Evaluate the model
conf_matrix_lr = confusion_matrix(y, y_pred_lr)
accuracy_lr = accuracy_score(y, y_pred_lr)
precision_lr = precision_score(y, y_pred_lr)
recall_lr = recall_score(y, y_pred_lr)
f1_lr = f1_score(y, y_pred_lr)

# Print the results
print("Logistic Regression Results:")
print("Confusion Matrix:\n", conf_matrix_lr)
print("Accuracy:", accuracy_lr)
print("Precision:", precision_lr)
print("Recall:", recall_lr)
print("F1 Score:", f1_lr)

Logistic Regression Results:
Confusion Matrix:
 [[21237  3776]
 [ 4877  7548]]
Accuracy: 0.768871200384636
Precision: 0.6665489226421759
Recall: 0.6074849094567405
F1 Score: 0.635647816750179
