In [1]:
import numpy as np
import pandas as pd

In [2]:
# Please, to run the experiments donwload the following dataset and put them in the /Dataset folder:
# 	- household_power_consumption.txt - 
#       https://archive.ics.uci.edu/ml/machine-learning-databases/00235/household_power_consumption.zip
#       (extract the .txt file)
    

filename = "Datasets/household_power_consumption.txt"
df = pd.read_csv(filename, sep=';', header=0, usecols=[2,3,6,7,8,4,5])
df = df.dropna()

# print(list(df.columns.values))

df['Global_active_power'] = pd.to_numeric(df['Global_active_power'], errors='coerce')
df['Global_reactive_power'] = pd.to_numeric(df['Global_reactive_power'], errors='coerce')
df['Voltage'] = pd.to_numeric(df['Voltage'], errors='coerce')
df['Global_intensity'] = pd.to_numeric(df['Global_intensity'], errors='coerce')

df['Sub_metering_1'] = pd.to_numeric(df['Sub_metering_1'], errors='coerce')
df['Sub_metering_2'] = pd.to_numeric(df['Sub_metering_2'], errors='coerce')
df['Sub_metering_3'] = pd.to_numeric(df['Sub_metering_3'], errors='coerce')

df = df.dropna()

print(df.shape)
print(df.dtypes)

  interactivity=interactivity, compiler=compiler, result=result)


(2049280, 7)
Global_active_power      float64
Global_reactive_power    float64
Voltage                  float64
Global_intensity         float64
Sub_metering_1           float64
Sub_metering_2           float64
Sub_metering_3           float64
dtype: object


In [3]:
x = df[['Global_active_power','Global_reactive_power','Global_intensity']]
x = x.to_numpy()
y = df['Voltage']
y = y.to_numpy()

In [4]:
import CaGD_ls

x = np.ascontiguousarray(x)
print(x.shape)
x = CaGD_ls.tens_pow(x,5) 

print(x.shape)

(2049280, 3)
(2049280, 363)


In [5]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(x)
x_scaled = scaler.transform(x)

In [6]:
from sklearn.decomposition import PCA
 
pca = PCA(n_components=0.999, svd_solver='full')
pca.fit(x_scaled)
x_scaled_pca = pca.transform(x_scaled)
print(np.shape(x_scaled_pca))
pca.explained_variance_ratio_.sum()

(2049280, 7)


0.9999910810039667

In [None]:
x_scaled_pca = CaGD_ls.add_bias(x_scaled_pca)
print(np.shape(x_scaled_pca))

In [None]:
import seaborn as sns
sns.distplot(y, hist=True, kde=True, 
             bins=int(180/5), color = 'darkblue', 
             hist_kws={'edgecolor':'black'},
             kde_kws={'linewidth': 4})
y.std()

In [None]:
lr = 1e-3  
block_dim = 2 
loss_accepted = 1e-5
max_iter = 5e1
lambda_LASSO = 1e-2
batch_size = 256

n = np.shape(x_scaled_pca)[1]
print("number of parameters ", n)
print("number of points ", x_scaled_pca.shape[0])
theta_0 = np.random.uniform(-1/n**0.5,1/n**0.5,n)

In [None]:
loss_momCA_GS,iteration_momCA_GS,theta_momCA_GS,t_momCA_GS = CaGD_ls.mom_CA_BCD_GS_ls(
                            x_scaled_pca,y,theta_0,lambda_LASSO,lr,loss_accepted,max_iter,2,0.75)

In [None]:
loss_momCA_random,iteration_momCA_random,theta_momCA_random,t_momCA_random = CaGD_ls.mom_CA_BCD_random_ls(
                            x_scaled_pca,y,theta_0,lambda_LASSO,lr,loss_accepted,max_iter,2,0.5)

In [None]:
loss_CABCD_stand_GS,iteration_CABCD_stand_GS,theta_CABCD_stand_GS,t_CABCD_stand_GS = CaGD_ls.CA_BCD_GS_ls(
                             x_scaled_pca,y,theta_0,lambda_LASSO,lr,loss_accepted,max_iter,2,0.75)

In [None]:
loss_ADAM,iteration_ADAM,theta_ADAM,t_ADAM = CaGD_ls.ADAM_ls(x_scaled_pca,y,theta_0,lambda_LASSO,batch_size,lr,loss_accepted,max_iter)

In [None]:
lr = 1e-6
loss_SAG,iteration_SAG,theta_SAG,t_SAG = CaGD_ls.SAG_ls(x_scaled_pca,y,theta_0,lambda_LASSO,batch_size,lr,loss_accepted,max_iter)

In [None]:
print("loss min mom GS CA = ", min(loss_momCA_GS))
print("loss min mom random CA = ", min(loss_momCA_random))
print("loss min standard GS CA = ", min(loss_CABCD_stand_GS))
print("loss min ADAM = ", min(loss_ADAM))
print("loss min SAG = ", min(loss_SAG))

In [None]:
from matplotlib import pyplot as plt

plt.plot(t_momCA_GS, loss_momCA_GS, label="CaBCD mom GS")
plt.plot(t_momCA_random, loss_momCA_random, label="CaBCD mom random")
plt.plot(t_CABCD_stand_GS, loss_CABCD_stand_GS, label="CaBCD GS")
plt.plot(t_ADAM, loss_ADAM, label="ADAM")
plt.plot(t_SAG, loss_SAG, label="SAG")
plt.legend()
plt.title('Loss MSE vs time Power Consumption')
plt.xlabel('time')
plt.ylabel('loss')

plt.xlim([0,10])
plt.ylim([0,max(loss_momCA_GS)])
# plt.savefig('CaBCD_vs_all_time_Elec.pdf', bbox_inches='tight')
plt.show()

plt.plot(iteration_momCA_GS, loss_momCA_GS, label="CaBCD mom GS")
plt.plot(iteration_momCA_random, loss_momCA_random, label="CaBCD mom random")
plt.plot(iteration_CABCD_stand_GS, loss_CABCD_stand_GS, label="CaBCD GS")
plt.plot(iteration_ADAM, loss_ADAM, label="ADAM")
plt.plot(iteration_SAG, loss_SAG, label="SAG")
plt.legend()
plt.title('Loss MSE vs iteration Power Consumption')
plt.xlabel('iteration (over the entire dataset)')
plt.ylabel('loss')
plt.xlim([0,15])
plt.ylim([0,max(loss_momCA_GS)])
# plt.savefig('CaBCD_vs_all_iteration_Elec.pdf', bbox_inches='tight')
plt.show()