In [51]:
import pandas as pd

file_names = {
    'origin': {
        'gg': 'en_origin_gg_all.csv',
        'inf': 'en_origin_inf_all.csv',
        'rng': 'en_origin_rng_all.csv'
    },
    'generated': {
        'gg': 'en_generated_gg_all.csv',
        'inf': 'en_generated_inf_all.csv',
        'rng': 'en_generated_rng_all.csv'
    }
}

dfs = {}

for key1, inner_dict in file_names.items():
    dfs[key1] = {}
    for key2, file_name in inner_dict.items():
        df = pd.read_csv(file_name)
        dfs[key1][key2] = df


In [68]:
import matplotlib.pyplot as plt

# Указываем порядок расположения ящиков с усами
boxplot_order = ['Origin-GG', 'Generated-GG', 'Origin-RNG', 'Generated-RNG', 'Origin-Inf', 'Generated-Inf']

# Проходим по признакам и рисуем ящики с усами для каждого датафрейма
for feature in features:
    fig, ax = plt.subplots(figsize=(8, 8))
    data = []
    labels = []
    colors = ['blue', 'red', 'green']  # Цвета для каждого датафрейма
    for boxplot_label in boxplot_order:
        key1, key2 = boxplot_label.split('-')
        df = dfs[key1][key2]
        data.append(df[feature].values)
        labels.append(boxplot_label)
    ax.boxplot(data, labels=labels, patch_artist=True, boxprops=dict(facecolor='white', color='black'))
    ax.set_title(feature)
    ax.set_ylabel('Values')
    ax.set_xlabel('Dataframes')
    ax.set_xticklabels(labels, rotation=45)
    for box, color in zip(ax.artists, colors):
        box.set_facecolor(color)

    # Сохраняем график в формате PDF
    plt.savefig(f'{feature}.pdf', format='pdf')

    # Сохраняем график в формате JPG
    plt.savefig(f'{feature}.jpg', format='jpg', dpi=300)

    plt.close()


In [86]:
import pandas as pd

dfs_merged = {}

for key in ['gg', 'inf', 'rng']:
    dfs_merged[key] = pd.concat([dfs['origin'][key], dfs['generated'][key]], axis=0)
    dfs_merged[key]['source'] = [1] * len(dfs['origin'][key]) + [0] * len(dfs['generated'][key])


In [110]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

classifiers = {}

for feature_column in dfs_merged['gg'].columns:  # Замените на список выбранных вами признаков
    classifiers[feature_column] = {}

    for key in ['gg', 'inf', 'rng']:
        df = pd.concat([dfs['origin'][key], dfs['generated'][key]], axis=0)
        X = dfs_merged[key][[feature_column]]
        y = dfs_merged[key]['source']

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        classifier = SVC()
        classifier.fit(X_train, y_train)

        classifiers[feature_column][key] = classifier
        
        accuracy = classifier.score(X_test, y_test)

In [113]:
import pandas as pd

accuracy_results = []

for feature_column, classifier_dict in classifiers.items():
    for key, classifier in classifier_dict.items():
        X_test = dfs_merged[key][[feature_column]]
        y_test = dfs_merged[key]['source']
        
        accuracy = classifier.score(X_test, y_test)
        
        accuracy_results.append({
            'feature_column': feature_column,
            'graph': key,
            'accuracy': accuracy
        })

results_df = pd.DataFrame(accuracy_results)
accuracy_df = results_df.pivot(index='graph', columns='feature_column', values='accuracy')

In [114]:
accuracy_df

feature_column,average_shortest_path_length,betweenness_centrality,closeness_centrality,diameter,edge_betweenness_centrality,load_centrality,radius,source
graph,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
gg,0.772727,1.0,0.801136,0.636364,1.0,1.0,0.636364,1.0
inf,0.761364,0.965909,0.727273,0.823864,0.977273,0.977273,0.823864,1.0
rng,0.914773,0.710227,0.693182,0.914773,0.636364,0.636364,0.914773,1.0
