# MAST 30034 Assignment 1

In [1]:
import itertools

import pandas as pd
import numpy as np

#Plotly
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

AV = [0, 20, 0, 0, 0, 0]
IV = [30, 45, 60, 40, 40, 40]
DURATION_ONES = [15, 20, 25, 15, 20, 25]
TC_R_LEN = 240  # Row length of TC
TC_C_LEN = 6    # Column length of TC


## Question 1.1

In [2]:
# Intialize 240 x 50 shapes for TC
TC = pd.DataFrame(index=range(TC_R_LEN), columns=np.arange(TC_C_LEN))

In [3]:
neg_ones_duration = np.subtract(IV, DURATION_ONES)

# Generating column-wise content
for idx, (increment, arrival, duration) in enumerate(zip(IV, AV, DURATION_ONES)):
    content_array = np.array([])
    
    #Horizontally generate {0, 1} based on increment and durations
    for i in range((TC_R_LEN//increment)+1):
        curr_row = np.hstack((np.ones(duration), np.zeros(increment-duration))) 
        content_array = np.append(content_array, curr_row)
    
    # Finalize TCs columns
    final_col = np.append(
                    np.tile(content_array, TC.shape[0]//content_array.size), 
                    content_array[:(TC.shape[0] % content_array.size)]
                ) 
    TC.loc[:, idx] = np.append(np.zeros(arrival), final_col[:240-arrival])

In [4]:
def normalize_column(df, col_no):
    col_mean = df.mean(axis=0)[col_no]
    col_std = df.std(axis=0)[col_no]

    normalized_col = np.subtract(df.loc[:, col_no], col_mean) / col_std
    
    return normalized_col

#### Building TC Subplot

In [28]:
fig = make_subplots(rows=2, cols=3,
                    subplot_titles=["Plot for TC "+str(i+1) for i in range(6)])
iterables = [[1, 2], [1, 2, 3]]

for idx, (row, col) in enumerate(itertools.product(*iterables)):
    n_col = normalize_column(TC, idx)
    fig.add_trace(
        go.Scatter(
            x=TC.index,
            y=n_col
        ),
        row=row, col=col    
    )
fig.update_layout(height=700, width=1200, title_text="TCs Source Standardized")
fig.show()

## Question 1.2

In [6]:
# Correlation Matrixs
TC.corr()

Unnamed: 0,0,1,2,3,4,5
0,1.0,7.506478000000001e-17,0.1690309,0.086066,-8.326673e-17,0.086066
1,7.506478000000001e-17,1.0,-0.02857143,0.130931,-6.708914e-17,-0.130931
2,0.1690309,-0.02857143,1.0,0.043644,-9.711506e-17,0.130931
3,0.0860663,0.1309307,0.04364358,1.0,0.7745967,0.6
4,-8.326673e-17,-6.708914e-17,-9.711506e-17,0.774597,1.0,0.774597
5,0.0860663,-0.1309307,0.1309307,0.6,0.7745967,1.0


In [7]:
fig = px.imshow(TC.corr(),
                x=np.arange(1, 7),
                y=np.arange(1, 7))
fig.update_layout(height=600, width=700, title_text="Random Correlation Matrix")
fig.show()

## Question 1.3

**IMPORTANT NOTE that numpy start with index of zero and range excludes the end index**

In [8]:
tmpSM = np.zeros((6, 21, 21))

In [9]:
# i) note that numpy start with index of zero
tmpSM[0, 2:6, 2:6] = 1 

# ii)
tmpSM[1, 2:6, 15:19] = 1

# iii)
tmpSM[2, 8:13, 15:19] = 1

# iv) 
tmpSM[3, 7:13, 14:19] = 1

# v)
tmpSM[4, 15:19, 2:6] = 1

# vi)
tmpSM[5, 15:19, 15:19] = 1


In [10]:
SM = pd.DataFrame(tmpSM.reshape((6, 441)))

In [11]:
fig = px.imshow(SM,
            width=500,
            height=500,
            aspect='auto')
fig.update_layout(
    autosize=False
)
fig.show()

## Question 1.4

In [12]:
#Gaussian Noise of Temporal Sources
gamma_t = np.random.normal( 0,     # mean
                            0.25,  # Standard Deviation
                            TC.shape #dimension
        )

#Gaussian Noise of Spatial Sources
gamma_s = np.random.normal( 0,     # mean
                            0.015,  # Standard Deviation
                            SM.shape #dimension
        )

# product of gamma_t and gamma_s
gamma_v = np.matmul(gamma_t, gamma_s)

#Correlations within the sources
R2_t = np.corrcoef(gamma_t.T)
R2_s = np.corrcoef(gamma_s)

# print(gamma_s.flatten)
print(gamma_t.reshape(1, np.product(gamma_t.shape)).shape)

(1, 1440)


In [13]:
fig = make_subplots(rows=1, cols=2, subplot_titles=["Temporal Source", "Spatial Source"])
fig.add_trace(go.Heatmap(z=R2_t), row=1, col=1)
fig.add_trace(go.Heatmap(z=R2_s), row=1, col=2)
fig.update_layout(height=500, width=1000, title_text="Correlation Matrix for Guassian Noises")
fig.show()

In [60]:
fig = make_subplots(rows=3, cols=1, 
                    subplot_titles=["Gaussian Noise distributions for Temporal", 
                                    "Gaussian Noise Distribution for Spatial", 
                                    "Product Distribution of Temporal and Spatial"])

fig.add_trace(go.Histogram(x=gamma_t.reshape(1, np.product(gamma_t.shape))[0]), row=1, col=1)
fig.add_trace(go.Histogram(x=gamma_s.reshape(1, np.product(gamma_s.shape))[0]), row=2, col=1)
fig.add_trace(go.Histogram(x=gamma_v.reshape(1, np.product(gamma_v.shape))[0]), row=3, col=1)

fig.update_layout(height=950, width=1000, title_text="Gaussian Noise distributions")
fig.show()

### Question 1.5

In [34]:
X = np.matmul(np.add(TC.to_numpy(), gamma_t), np.add(SM.to_numpy(), gamma_s))
X = pd.DataFrame(X)
X.shape

(240, 441)

In [53]:
# Randomly choose 101 columns from X
X_sample = X.sample(n=101, axis='columns').copy()

In [54]:
fig = px.line(X_sample, y = X_sample.columns, title="101 X's Time series")
fig.update_layout(showlegend=False, xaxis_title="Time Unit", yaxis_title="Value")
fig.show()

In [59]:
X_var = X.var(axis=0)
fig = px.line(y = X_var, title="Variance of 441 Variables")
fig.show()