In [6]:
import joblib
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, KFold, cross_val_predict, cross_validate
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
import pandas as pd
import time

def get_different_classification():

    ####################
    # TRAINING
    ####################

    arts = ['art_nouveau', 'baroque', 'expressionism', 'impressionism', 'post_impressionism', 'realism', 'renaissance', 'romanticism', 'surrealism', 'ukiyo-e']
    paths_train_nq2 = []
    method = 'nq2'
    
    for art in arts:
        path_fake_train = f'csv/art_{method}/train/AI_SD_{art}.csv'
        path_real_train = f'csv/art_{method}/train/{art}.csv'
        paths_train_nq2.extend([path_fake_train, path_real_train])

    paths_train_ref = []
    method = 'ref'
    
    for art in arts:
        path_fake_train = f'csv/art_{method}/train/AI_SD_{art}.csv'
        path_real_train = f'csv/art_{method}/train/{art}.csv'
        paths_train_ref.extend([path_fake_train, path_real_train])

    # Get data
    data_train_nq2 = pd.concat([pd.read_csv(path, header=0) for path in paths_train_nq2], ignore_index=True)
    X_train_nq2 = data_train_nq2.iloc[:, :-1]
    y_train_nq2 = data_train_nq2.iloc[:, -1]

    data_train_ref = pd.concat([pd.read_csv(path, header=0) for path in paths_train_ref], ignore_index=True)
    X_train_ref = data_train_ref.iloc[:, :-1]
    y_train_ref = data_train_ref.iloc[:, -1]

    # Random Forest
    rf_nq2 = RandomForestClassifier(n_estimators=100)
    rf_nq2.fit(X_train_nq2, y_train_nq2)

    rf_ref = RandomForestClassifier(n_estimators=100)
    rf_ref.fit(X_train_ref, y_train_ref)
    
    ####################
    # TEST
    ####################

    paths_test_nq2 = []
    method = 'nq2'

    for art in arts:
        path_fake_test = f'csv/art_{method}/test/AI_SD_{art}.csv'
        path_real_test = f'csv/art_{method}/test/{art}.csv'
        paths_test_nq2.extend([path_fake_test, path_real_test])

    paths_test_ref = []
    method = 'ref'

    for art in arts:
        path_fake_test = f'csv/art_{method}/test/AI_SD_{art}.csv'
        path_real_test = f'csv/art_{method}/test/{art}.csv'
        paths_test_ref.extend([path_fake_test, path_real_test])

    data_test_nq2 = pd.concat([pd.read_csv(path, header=0) for path in paths_test_nq2], ignore_index=True)
    X_test_nq2 = data_test_nq2.iloc[:, :-1]
    y_test_nq2 = data_test_nq2.iloc[:, -1]

    data_test_ref = pd.concat([pd.read_csv(path, header=0) for path in paths_test_ref], ignore_index=True)
    X_test_ref = data_test_ref.iloc[:, :-1]
    y_test_ref = data_test_ref.iloc[:, -1]

    # Predict
    y_test_pred_nq2 = rf_nq2.predict(X_test_nq2)
    y_test_pred_ref = rf_ref.predict(X_test_ref)

    correct_nq2 = y_test_nq2[y_test_nq2 == y_test_pred_nq2].index
    incorrect_ref = y_test_ref[y_test_ref != y_test_pred_ref].index

    distinct = correct_nq2.intersection(incorrect_ref)

    with open("nq2_correct_ref_wrong.txt", "w") as f:
        for idx in distinct:
            real_idx = idx % 1000
            path_idx = idx // 1000
            if(path_idx % 2 == 0):
                f.write(f"{real_idx}, {arts[path_idx // 2]}_ai\n")
            else:
                f.write(f"{real_idx}, {arts[path_idx // 2]}_real\n")
            


In [8]:
get_different_classification()