# Setup

In [None]:
!pip install --upgrade scikit-learn

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import csv

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification

from tensorflow.keras import layers, models, datasets

#SVM

##Set up dataset

In [None]:
scaler = StandardScaler()

In [None]:
df = pd.read_csv('') # Replace the path to the dataset to the working one
df.dropna(inplace=True)
columns_drop = ['label', 'image', 'style', 'category']

X = df.drop(columns_drop, axis=1)
X_norm = scaler.fit_transform(X)
y = df['label']
y_category = df['category']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.2, stratify=y_category, random_state=42)

##Distinguish AI and Human

In [None]:
param_grid = {'C': [0.1, 1, 10],
              'gamma': [0.1, 1, 10, 'scale', 'auto'],
              'kernel': ['linear', 'rbf']
}
svm = SVC()

grid_search = GridSearchCV(svm, param_grid, verbose=3, cv=5)
grid_search.fit(X_train, y_train)

print("Best parameters found: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

best_svm = grid_search.best_estimator_
test_accuracy = best_svm.score(X_test, y_test)

print("Test set accuracy with best parameters:", test_accuracy)

In [None]:
model = SVC(kernel='rbf', C=10, gamma='auto')

In [None]:
y_pred = model.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

In [None]:
cm = confusion_matrix(y_test, y_pred, labels=model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=model.classes_)
disp.plot()

##Distinguish different category

In [None]:
param_grid = {'C': [0.1, 1, 10],
              'gamma': [0.1, 1, 10, 'scale', 'auto'],
              'kernel': ['linear', 'rbf']
}
svm = SVC()

grid_search = GridSearchCV(svm, param_grid, verbose=3, cv=5)
grid_search.fit(X_train, y_train)

print("Best parameters found: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

best_svm = grid_search.best_estimator_
test_accuracy = best_svm.score(X_test, y_test)

print("Test set accuracy with best parameters:", test_accuracy)

In [None]:
model = SVC(kernel='rbf', C=10, gamma='scale')

In [None]:
y_pred = model.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

In [None]:
cm = confusion_matrix(y_test, y_pred, labels=model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=model.classes_)
disp.plot()
plt.xticks(rotation=90)
plt.show()

#LR

##Set up dataset

In [None]:
df = pd.read_csv('/content/CS180H_size6000.csv') # Replace the path to the dataset to the working one
df.dropna(inplace=True)
columns_drop = ['label', 'image', 'style']
X = df.drop(columns_drop, axis=1)
y = df['label']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y_category, random_state=42)

##Distinguish AI and Human

In [None]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('logreg', LogisticRegression())
])

In [None]:
param_grid = {
    'logreg__solver': ['lbfgs', 'saga', 'liblinear'],
    'logreg__penalty': ['l2', 'elasticnet'],
    'logreg__C': [0.2, 0.3, 0.5, 0.7, 0.8, 1],
    'logreg__max_iter': [50, 80, 100, 120, 200, 500, 1000]
}

In [None]:
grid_search = GridSearchCV(pipeline, param_grid, cv=5, verbose=3)
grid_search.fit(X_train, y_train)

In [None]:
print("Best parameters found: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

best_lr = grid_search.best_estimator_
test_accuracy = best_lr.score(X_test, y_test)

print("Test set accuracy with best parameters:", test_accuracy)

In [None]:
model = make_pipeline(StandardScaler(), LogisticRegression(C=1, max_iter=50, penalty='l2', solver='liblinear'))
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

In [None]:
cm = confusion_matrix(y_test, y_pred, labels=model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=model.classes_)
disp.plot()

##Distinguish different category

In [None]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('logreg', LogisticRegression())
])

In [None]:
param_grid = {
    'logreg__solver': ['lbfgs', 'saga', 'liblinear'],
    'logreg__penalty': ['l2', 'elasticnet'],
    'logreg__C': [0.2, 0.3, 0.5, 0.7, 0.8, 1],
    'logreg__max_iter': [50, 80, 100, 120, 200, 500, 1000]
}

In [None]:
grid_search = GridSearchCV(pipeline, param_grid, cv=5, verbose=3)
grid_search.fit(X_train, y_train)

In [None]:
print("Best parameters found: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

best_svm = grid_search.best_estimator_
test_accuracy = best_svm.score(X_test, y_test)

print("Test set accuracy with best parameters:", test_accuracy)

In [None]:
best_lr = make_pipeline(StandardScaler(), LogisticRegression(C=1, max_iter=100, penalty='l2', solver='lbfgs'))
best_lr.fit(X_train, y_train)

y_pred = best_lr.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

In [None]:
cm = confusion_matrix(y_test, y_pred, labels=best_lr.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=best_lr.classes_)
disp.plot()
plt.xticks(rotation=90)
plt.show()

#MLP

##Set up dataset

In [None]:
scaler = StandardScaler()
df = pd.read_csv('') # Replace the path to the dataset to the working one
df.dropna(inplace=True)
columns_drop = ['category', 'image', 'label', 'style']
X = df.drop(columns_drop, axis=1)
X_norm = scaler.fit_transform(X)
y = df['label']
y_category = df['category']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.2, stratify=y_category, random_state=42)

##Distinguish AI and Human

In [None]:
mlp = MLPClassifier()

In [None]:
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50),],
    'activation': ['identity', 'logistic', 'relu'],
    'solver': ['adam'],
    'alpha': [0.0001, 0.05],
    'max_iter': [200, 300, 1000],
    'random_state': [30,40,50],
    'learning_rate_init': [0.0001]
}

In [None]:
grid = GridSearchCV(mlp, param_grid, verbose=5)

In [None]:
grid.fit(X_train, y_train)

In [None]:
print("Best parameters found: ", grid.best_params_)
print("Best score: ", grid.best_score_)

best_svm = grid.best_estimator_
test_accuracy = best_svm.score(X_test, y_test)

print("Test set accuracy with best parameters:", test_accuracy)

In [None]:
clf = MLPClassifier(alpha = 0.05, hidden_layer_sizes= (50,50), activation='relu', solver='adam', random_state=40, learning_rate_init=0.0001, max_iter=1000).fit(X_train, y_train)

In [None]:
clf.score(X_test, y_test)

In [None]:
y_pred = clf.predict(X_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)

In [None]:
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=clf.classes_)
disp.plot()

##Distinguish different categories

In [None]:
mlp = MLPClassifier()

In [None]:
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50),],
    'activation': ['identity', 'logistic', 'relu'],
    'solver': ['adam'],
    'alpha': [0.0001, 0.05],
    'max_iter': [200, 300, 1000],
    'random_state': [30,40,50],
    'learning_rate_init': [0.0001]
}

In [None]:
grid = GridSearchCV(mlp, param_grid, verbose=5)

In [None]:
grid.fit(X_train, y_train)

In [None]:
print("Best parameters found: ", grid.best_params_)
print("Best score: ", grid.best_score_)

best_mlp = grid.best_estimator_
test_accuracy = best_mlp.score(X_test, y_test)

print("Test set accuracy with best parameters:", test_accuracy)

In [None]:
best_mlp = MLPClassifier(alpha = 0.0001, hidden_layer_sizes= (100,), activation='relu', solver='adam', random_state=30, learning_rate_init=0.0001, max_iter=1000).fit(X_train, y_train)

In [None]:
best_mlp.score(X_test, y_test)

In [None]:
y_pred = best_mlp.predict(X_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=best_mlp.classes_)
disp.plot()
plt.xticks(rotation=90)
plt.show()