## Intel sensors level A2

Authors: Martin Kostelník, Marianne Jakonen, Ahmed

In [89]:
# Imports
import helper
import pandas as pd
import numpy as np
import math

from sklearn.cross_decomposition import PLSRegression
from sklearn.metrics import mean_squared_error

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Define some paths
DATA_PATH = r"../data/data.txt.gz"
DATA_PROCESSED_PATH = r"../data/data_processed.csv"

In [55]:
# First let's get the data
# I preprocessed the data and saved it for faster loading

# Load raw data
# data_raw = helper.load_data(DATA_PATH)

# Preprocess data
# data = helper.preprocess_data(data_raw)
# data.to_csv("data_resampled.csv")

In [56]:
# Load processed data
df = pd.read_csv(DATA_PROCESSED_PATH)

# Find the most correlated variable
correlation = df.corr(numeric_only=True).sum().sort_values(ascending=False)
correlation

L-7     48.881236
L-4     46.517966
L-10    45.147674
L-27    45.077371
L-29    44.802276
          ...    
H-31   -26.504261
H-26   -27.879231
H-30   -28.389539
H-24   -29.140159
H-25   -30.755250
Length: 156, dtype: float64

We can see that the most correlated variable is Light of a sensor number 7.
Let's take it out of a dataframe as it is the variable we will make predictions for.

In [69]:
X = df.loc[:, df.columns != "L-7"].drop(columns=["datetime"])
Y = df["L-7"]

In [107]:
# https://stackoverflow.com/questions/17315737/split-a-large-pandas-dataframe
def split_dataframe(df, chunk_size): 
    chunks = list()
    num_chunks = len(df) // chunk_size + 1
    for i in range(num_chunks):
        chunks.append(df[i*chunk_size:(i+1)*chunk_size])
    return chunks

DAY_LEN = 12
end = DAY_LEN
beta_coefficients = []

days_X = split_dataframe(X, DAY_LEN)
days_Y = split_dataframe(Y, DAY_LEN)


for day_X, day_Y in zip(days_X, days_Y):
    pls = PLSRegression(n_components=2)
    pls.fit(day_X, day_Y)

    beta_coefficients.append(pls.coef_.squeeze())

beta_coefficients


[array([ 0.62372443,  0.26563356,  0.30520016,  0.23208458, -0.06072473,
        -0.03810577, -0.08128288, -0.16845885, -0.568582  ,  0.40835524,
         0.64583019,  0.50101578,  0.75965453,  0.74565088,  0.80933217,
         0.72024057,  0.7095338 ,  0.91269622,  0.78047904,  0.84387666,
         0.60754904,  0.84927655,  0.68769529,  0.80504791,  0.62310504,
         0.75814776,  0.55797333,  0.63473579,  0.54446601,  0.53639236,
         0.62420332,  0.27093625,  0.09338823, -0.14856846,  0.12099045,
        -0.22550846, -0.30163718, -0.32981119, -0.47252206,  1.7675923 ,
         1.67970686,  1.72352203,  1.67328991,  1.68309915,  1.58138118,
         1.57493438,  1.26214717,  1.68608446,  1.72526083,  1.60567653,
         1.66236968,  1.64306736,  1.696673  ,  1.64138004,  1.64901872,
         1.64325108,  1.65643489,  1.68506205,  1.74514891,  1.67544811,
         1.71023458,  1.6971135 ,  1.75498465,  1.67101772,  1.69074655,
         1.73858894,  1.78250405,  1.73190003,  1.8