# Analysis for California Dataset

In [6]:
import warnings
warnings.filterwarnings('ignore')

In [7]:
import scrapbook as sb 
import pandas as pd
import numpy as np 
import seaborn as sns 
import numpy as np
from statistics import mean
import matplotlib.pyplot as plt

## BaseLine Models 

We have used Random Forest, Catboost, Vanilla NN and Stats Model as the Baseline Model for the problem 

Importing data of baseline Models 

In [9]:
books = sb.read_notebooks("./BaseLine_Model_Output")
baseLine_data = []
for nb in books.notebooks: 
    nbList=[nb.scraps['Catboost MSE'].data,
            nb.scraps['Stats Model MSE'].data,
            nb.scraps['Random Forest MSE'].data,
            nb.scraps['Vanilla NN MSE'].data]
    baseLine_data.append(nbList)
df = pd.DataFrame(baseLine_data, columns = ["Catboost","Stats Model","Random Forest","Vanilla Neural Network"])
display(df)
print("MEAN:")
print(df.mean(axis = 0))
baseLine_data = np.array(baseLine_data)

Unnamed: 0,Catboost,Stats Model,Random Forest,Vanilla Neural Network
0,0.144145,0.402561,0.375378,0.971213
1,0.146537,0.387095,0.382015,1.009413
2,0.140133,0.392521,0.38266,0.994935
3,0.148387,0.393289,0.390873,0.976467
4,0.147693,0.417347,0.388755,0.999524
5,0.141518,0.381309,0.35631,0.975415
6,0.145613,0.403077,0.380829,1.046506
7,0.144509,0.389052,0.386161,0.998154
8,0.150077,0.405029,0.388871,1.003687
9,0.14686,0.397556,0.384262,0.996739


MEAN:
Catboost                  0.145547
Stats Model               0.396884
Random Forest             0.381612
Vanilla Neural Network    0.997205
dtype: float64


## GAN 

Simple C-GAN was used to train the dataset 

In [10]:
book = sb.read_notebooks("./GAN_Output")
gan_data = []
gan_mse = []
for nb in book.notebooks:
    metrics = nb.scraps['GAN_1 Metrics'].data
    for i in range(1000):
        gan_mse.append(metrics[0][i])
    nbList = [nb.scraps['GAN Model MSE'].data,
              nb.scraps['GAN Model MAE'].data,
              nb.scraps['GAN Model Euclidean distance'].data,
              nb.scraps['GAN Model Manhattan Distance'].data]        
    gan_data.append(nbList)
df = pd.DataFrame(gan_data, columns = ['MSE','MAE','Euclidean Distance','Manhattan Distance'])
display(df)
print("MEAN:")
print(df.mean(axis = 0))
gan_data = np.array(gan_data)

Unnamed: 0,MSE,MAE,Euclidean Distance,Manhattan Distance,Epochs
0,0.344669,0.382632,37.718008,1579.503514,5000
1,0.315276,0.386188,36.074626,1594.183435,5000
2,0.388851,0.415491,40.062468,1715.148223,5000
3,0.461058,0.471094,43.624342,1944.677065,5000
4,0.334432,0.395961,37.153845,1634.526491,5000
5,0.383356,0.398691,39.778135,1645.796486,5000
6,0.463805,0.465104,43.752859,1919.947921,5000
7,0.348313,0.408673,37.91711,1687.001492,5000
8,0.350196,0.396515,38.019089,1636.812341,5000
9,0.386557,0.410137,39.94497,1693.04366,5000


MEAN:
MSE                      0.377651
MAE                      0.413048
Euclidean Distance      39.404545
Manhattan Distance    1705.064063
Epochs                5000.000000
dtype: float64


## ABC_GAN  (Catboost Pre generator)

In [None]:
book = sb.read_notebooks("./ABC_GAN_Catboost_Output")
paramVal = [0.01,0.1,1]
abc_mse = [[] for i in range(3)]
abc_mse_skip = [[] for i in range(3)]
abc_mse_mean = [[] for i in range(3)]
abc_mse_skip_mean = [[] for i in range(3)]
abc_weights = [[] for i in range(3)]
abc_epochs = [[] for i in range(3)]

for nb in book.notebooks:
    metrics1 = np.array(nb.scraps['ABC_GAN_1 Metrics'].data)
    metrics2 = np.array(nb.scraps['ABC_GAN_2 Metrics'].data)
    metrics3 = np.array(nb.scraps['ABC_GAN_3 Metrics'].data)
    paramVar = float(nb.papermill_dataframe.iloc[0]['value'])
    #Divide data according to parameters 
    for i in range(3):
        if paramVar == paramVal[i]:
            for j in range(1000):
                abc_mse[i].append(metrics1[0,j])
                abc_mse_skip[i].append(metrics3[0,j])
            abc_epochs[i].append(nb.scraps['ABC-GAN Model n_epochs'].data)
            abc_weights[i].append(nb.scraps['Skip Connection Weight'].data)
            abc_epochs[i].append(nb.scraps['ABC-GAN Model n_epochs'].data)
            abc_mse_mean[i].append(mean(metrics1[0,:]))
            abc_mse_skip_mean[i].append(mean(metrics3[0,:]))

In [None]:
for i in range(3):
    data = []
    for j in range(len(abc_weights[i])):
        data.append([paramVal[i],abc_weights[i][j],abc_mse_mean[i][j],abc_mse_skip_mean[i][j],abc_epochs[i][j]])
    df = pd.DataFrame(data, columns = ['Variance','Weight','ABC_Mean','Skip Connection ABC Mean','Epochs'])
    print(df)
    print(df.mean(axis=0))

   Variance    Weight  ABC_Mean  Skip Connection ABC Mean  Epochs
0      0.01  0.000000  0.142007                  0.005896      62
1      0.01  0.005365  0.132533                  0.003611      62
2      0.01  0.035787  0.388269                  0.011524    5000
3      0.01  0.071458  0.157338                  5.350273    5000
4      0.01  0.073413  0.277285                  0.010582      58
Variance                       0.010000
Weight                         0.037205
ABC_Mean                       0.219486
Skip Connection ABC Mean       1.076377
Epochs                      2036.400000
dtype: float64
   Variance    Weight  ABC_Mean  Skip Connection ABC Mean  Epochs
0       0.1  0.033136  0.241343                  0.015642      53
1       0.1  0.000000  0.132892                  0.015136      53
2       0.1  0.002828  0.110457                  0.015003     193
3       0.1  0.039032  0.169502                  0.015846     193
4       0.1  0.012134  0.056144                  0.017275  