# Measuring efficiency

## Import

In [None]:
import pandas as pd

In [None]:
L2_TARGET = 0.01160101
CLSCOEF_TARGET = 0.267977
T2_TARGET = 0.02666667

## Reading our dataset

In [None]:
FILENAME = "NGEN=10000-CLSCOEF-T2"

In [None]:
df = pd.read_csv(FILENAME + ".csv")
df = df.drop("Unnamed: 0", axis=1)

In [None]:
df.head

## Measuring Complexity

### Setting up `ECOL`

In [None]:
# import rpy2's package module
import rpy2.robjects.packages as rpackages

# import R's utility package
utils = rpackages.importr('utils')

# select a mirror for R packages
utils.chooseCRANmirror(ind=1)  # select the first mirror in the list

# Install the ECoL package
utils.install_packages("ECoL")

# Import Ecol
ecol = rpackages.importr('ECoL')


### Measuring our complexity

In [None]:
from rpy2.robjects import pandas2ri
from rpy2.robjects import Formula

pandas2ri.activate()

# Convert to r_df
r_df = pandas2ri.py2rpy(df)
fml = Formula('label ~ .')

#### L2 - T2

In [None]:
l2Vector = ecol.linearity_formula(fml, df, measures="L2", summary="return")
l2 = l2Vector.rx(1)
l2 = l2[0][0]

t2Vector = ecol.dimensionality_formula(
    fml, df, measures="T2", summary="return")
t2 = t2Vector[0]

data = {'Target': [L2_TARGET,T2_TARGET], 'Result': [l2,t2]}

#### L2 - CLSCOEF

In [None]:
l2Vector = ecol.linearity_formula(fml, df, measures="L2", summary="return")
l2 = l2Vector.rx(1)
l2 = l2[0][0]

clscoefVector = ecol.network_formula(
    fml, df, measures="ClsCoef", summary="return")
clscoef = clscoefVector.rx(1)
clscoef = clscoef[0][0]

data = {'Target': [L2_TARGET, CLSCOEF_TARGET,], 'Result': [l2, clscoef]}


#### CLSCOEF - T2

In [None]:
clscoefVector = ecol.network_formula(
    fml, df, measures="ClsCoef", summary="return")
clscoef = clscoefVector.rx(1)
clscoef = clscoef[0][0]

t2Vector = ecol.dimensionality_formula(
    fml, df, measures="T2", summary="return")
t2 = t2Vector[0]

data = {'Target': [CLSCOEF_TARGET,T2_TARGET], 'Result': [clscoef, t2]}

#### L2 - clscoef - T2

In [None]:
l2Vector = ecol.linearity_formula(fml, df, measures="L2", summary="return")
l2 = l2Vector.rx(1)
l2 = l2[0][0]

clscoefVector = ecol.network_formula(fml, df, measures="ClsCoef", summary="return")
clscoef = clscoefVector.rx(1)
clscoef = clscoef[0][0]

t2Vector = ecol.dimensionality_formula(
    fml, df, measures="T2", summary="return")
t2 = t2Vector[0]

data = {'Target':[L2_TARGET,CLSCOEF_TARGET,T2_TARGET],'Result':[l2,clscoef,t2]}

#### 

## Taking a look at the efficiency

In [None]:
df_plot = pd.DataFrame(data, index=['L2','T2'])

In [None]:
df_plot = pd.DataFrame(data, index=['L2','CLSCOEF'])

In [None]:
df_plot = pd.DataFrame(data, index=['CLSCOEF','T2'])

In [None]:
df_plot = pd.DataFrame(data, index=['L2','CLSCOEF','T2'])

In [None]:
from matplotlib import pyplot as plt
import matplotlib

matplotlib.style.use('Solarize_Light2')
df_plot.plot.bar(figsize=(25, 13), rot=0)
plt.title("Eficiência da Geração de Dataset Sintético", fontsize=20)
plt.xlabel("Medida de Complexidade", fontsize=16)
plt.ylabel("Valor de complexidade", fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)