In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

def plot_pie_chart(original_labels, predicted_labels, title):
    original_counts = pd.DataFrame(original_labels).value_counts()
    predicted_counts = pd.DataFrame(predicted_labels).value_counts()
    labelsTrain = []
    for i in set(original_labels):
        labelsTrain.append(f"Class {i}")
    labelsTest = []
    for i in set(predicted_labels):
        labelsTest.append(f"Class {i}")
    # Plotting the pies
    fig, ax = plt.subplots(1, 2, figsize=(12, 6))


    # Original Data Pie
    ax[0].pie(original_counts, labels=labelsTrain, autopct='%1.1f%%', startangle=90)
    ax[0].set_title('Original Data Classes')

    # Predicted Data Pie
    ax[1].pie(predicted_counts, labels=labelsTest, autopct='%1.1f%%', startangle=90)
    ax[1].set_title('Predicted Data Classes')

    fig.suptitle(title, fontsize=20)
    # Display the plot
    plt.show()

In [3]:
from models import LogisticRegression
# from constants import UNIVARIATE_DATASET_NAMES_2018
from utils import read_dataset,  label_encoder

UNIVARIATE_DATASET_NAMES_2018  = ['ArrowHead', 'Wine', 'FreezerSmallTrain', 'OliveOil', 'Car', 'BeetleFly',  'Yoga', 'InlineSkate', 'FaceAll', 'EOGVerticalSignal',  'Ham', 'MoteStrain','ProximalPhalanxTW',  'WordSynonyms', 'Lightning7', 'GunPointOldVersusYoung',  'Earthquakes' ]
results=[]


for dataset in UNIVARIATE_DATASET_NAMES_2018:
    train_accuracies = []
    test_accuracies = []
    for i in range(10):
        df = read_dataset(dataset)
        X = df[dataset][0]
        y = label_encoder(df[dataset][1])
        model = LogisticRegression()
        model.fit(X, y)
        
        y_pred_train = model.predict(X)

        accuracy_train = model.score(y_pred_train, y)

        y_pred_test = model.predict(df[dataset][2])

        accuracy_test = model.score(y_pred_test, label_encoder(df[dataset][3]))

        train_accuracies.append(accuracy_train)
        test_accuracies.append(accuracy_test)
    
    # plot_pie_chart(y, y_pred_train, f"{dataset} Train Dataset")
    # plot_pie_chart(df[dataset][3], y_pred_test, f"{dataset} Test Dataset")

    # print(f"{dataset} train: ",accuracy_train)
    # print(f"{dataset} test: ",accuracy_test)
    mean_train_accuracy = np.mean(train_accuracies)
    mean_test_accuracy = np.mean(test_accuracies)
    results.append([dataset, "%.2f" % mean_train_accuracy, "%.2f" % mean_test_accuracy])



In [4]:
results = pd.DataFrame(results, columns=["Dataset","Train Accuracy", "Test Accuracy"])

In [8]:
print(results)

                   Dataset Train Accuracy Test Accuracy
0                ArrowHead          95.56         73.54
1                     Wine          51.93         60.93
2        FreezerSmallTrain         100.00         68.79
3                 OliveOil          47.67         43.00
4                      Car          96.67         79.17
5                BeetleFly         100.00         70.00
6                     Yoga          70.83         62.24
7              InlineSkate          74.40         22.91
8                  FaceAll          81.09         64.46
9        EOGVerticalSignal          62.51         26.44
10                     Ham          97.52         68.57
11              MoteStrain         100.00         83.54
12       ProximalPhalanxTW          76.40         73.95
13            WordSynonyms          71.72         37.82
14              Lightning7         100.00         48.63
15  GunPointOldVersusYoung          92.35         86.48
16             Earthquakes          99.97       

In [9]:
results.to_csv('results.csv', index=False)  