In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb

# Data Visualization LSTM 96 (predict 24h)

## 1. Load data
### 1.1 Load Original and "Faked" Train data

In [52]:
# load first batch of original train data
data_raw_original_1 = pd.read_csv("dataset/eband_original_collected_train_data_2019-10-20.csv")
print("Shape of data_raw_original_1:", data_raw_original_1.shape)

# load second (current) batch of original train data
data_raw_original_2 = pd.read_csv("dataset/eband_original_collected_train_data.csv")
print("Shape of data_raw_original_2:", data_raw_original_2.shape)

# append 1 and 2
data_raw_original = data_raw_original_1.append(data_raw_original_2)
print("Shape of data_raw_original:", data_raw_original.shape)

# load augmanted / faked data
data_raw_faked = pd.read_csv("dataset/eband_faked_train_data.csv")
data_raw_faked.drop("comment", axis=1, inplace = True)
print("Shape of data_raw_faked:", data_raw_faked.shape)


# append faked data to original data
data_raw_original_and_faked = data_raw_original.append(data_raw_faked)
print("Shape of data_raw_original and faked:", data_raw_original_and_faked.shape)


# load Temp train data data ( unsure if this data shall be used as train data)
data_raw_temp = pd.read_csv("dataset/eband_TEMP_train_data.csv")
print("Shape of data_raw_temp:", data_raw_temp.shape)


# append faked data to original data
data_raw_train = data_raw_original_and_faked.append(data_raw_temp)


print("Number of training examples (m)",data_raw_train.shape[0]/96)
 
print("Shape of data_raw_train", data_raw_train.shape)

Shape of data_raw_original_1: (20352, 58)
Shape of data_raw_original_2: (4512, 58)
Shape of data_raw_original: (24864, 58)
Shape of data_raw_faked: (4320, 58)
Shape of data_raw_original and faked: (29184, 58)
Shape of data_raw_temp: (1056, 58)
Number of training examples (m) 315.0
Shape of data_raw_train (30240, 58)


### 1.2 Load Posted Train data , from users via web app

In [53]:
import glob

# The path to where all training examples are posted
path = r"C:\Users\glenn\OneDrive\Skrivbord\TXI\Work_TXI_PM_AI_Version_3.8.2\Training_Examples_Posted\eband"

all_csv_files = glob.glob(path + "/*.csv")

li = []

for filename in all_csv_files:
    df = pd.read_csv(filename, index_col=None, header=None)
    li.append(df)

all_posted_training_examples = pd.concat(li, axis=0, ignore_index=True)
# Note that posted train examples use the same frame format (without header)
all_posted_training_examples.columns =['NeId', 'Time', 'NeAlias', 'NeType', 'HalfBPSK_Strong', 'HalfBPSK',
       'HalfBPSK_Light', 'BPSK_Strong', 'BPSK', 'BPSK_Light', 'QAM4_Strong',
       'QAM4', 'QAM4_Light', 'QAM16_Strong', 'QAM16', 'QAM16_Light',
       'QAM32_Strong', 'QAM32', 'QAM32_Light', 'QAM64_Strong', 'QAM64',
       'QAM64_Light', 'QAM128_Strong', 'QAM128', 'QAM128_Light',
       'QAM256_Strong', 'QAM256', 'QAM256_Light', 'QAM512_Strong', 'QAM512',
       'QAM512_Light', 'QAM1024_Strong', 'QAM1024', 'QAM1024_Light',
       'QAM2048_Strong', 'QAM2048', 'QAM2048_Light', 'QAM4096_Strong',
       'QAM4096', 'QAM4096_Light', 'ModChanges_x', 'FailureDescription',
       'ES_x', 'SES_x', 'UAS_x', 'ActualReceived_Level_x', 'FarEnd_NeAlias',
       'ES_y', 'SES_y', 'UAS_y', 'ActualReceived_Level_y', 'ModChanges_y',
       'fading_metric', 'sum_errors', 'hw_fault', 'pred_group', 'Y label',
       'Y 2']

print("Shape of posted training examples",all_posted_training_examples.shape)

Shape of posted training examples (7200, 58)


### 1.3 Concatenate Train data into one frame

In [57]:
data_raw_train_all = data_raw_train.append(all_posted_training_examples)
print ("Shape of all training examples",data_raw_train_all.shape)
print("Number of training examples", data_raw_train_all.shape[0]/96)

Shape of all training examples (37440, 58)
Number of training examples 390.0


## 2. Prepare and save EBAND DATASET

### 2.1 Augment features

In [58]:

def augment_features(df):
    
    # IMPORTANT. DO NOT MODIFY THIS FUNCTION WITHOUT CHANGE IN ALL SCRIPTS !!!
    
    df ["rx_levels_metric"] = df ["ActualReceived_Level_x"] + df ["ActualReceived_Level_x"]
    
    df ["ModChanges_metric"] = (df ["ModChanges_x"] + df ["ModChanges_y"]) * 2
    
    df ["errors_metric"] = ((df ["SES_x"] + df ["SES_y"]) * 100) + ((df ["ES_x"] + df ["ES_y"]) * 10 )
    
    return df

# Call function above
eband_dataset = augment_features(data_raw_train_all)

eband_dataset.drop([ "Time", "NeId", "NeType", "FailureDescription",
                                  "FarEnd_NeAlias","pred_group",
                                 "QAM2048_Strong", "QAM2048", "QAM2048_Light","QAM4096_Strong",
                                  "QAM4096", "QAM4096_Light"], axis=1, inplace=True )


eband_dataset.shape

(37440, 49)

### 2.2 Save dataset so it can be used by models

In [59]:
# Save to csv, this file is used in "Visualization notebook"
eband_dataset.to_csv("dataset/sharp_dataset/eband_dataset.csv", sep=',', header=True, float_format='%.2f', index=False)

print("Shape of eband_dataset:", eband_dataset.shape)

Shape of eband_dataset: (37440, 49)


## 3. Data visualization

### 3.1 Class distrubution 15min data label (to predict each 15 min period)

In [70]:

def class_distr(input_df):

    df = input_df.groupby(["Y label"]).agg( { 'NeAlias': 'count',
                "sum_errors":"mean", "UAS_x": "mean", "fading_metric":"mean" } )

    df["Number of train examples"] =  df["NeAlias"]
    df["For validation/ test (80/20)"] =  df["Number of train examples"]*0.2
    df["errors_average"] = df["sum_errors"].round(1)
    df["UAS_x_average"] = df["UAS_x"].round(1)
    df["fading_metric_average"] = df["fading_metric"].round(1)
    df.drop(["NeAlias", "UAS_x", "sum_errors", "fading_metric"], axis=1, inplace=True)
    df.reset_index(inplace=True)

    # change prediction classes from value (numbers) to text


    df.set_index("Y label", inplace=True)
    return df

In [71]:
# Call function

view_class_distr = class_distr(eband_dataset)
view_class_distr

Unnamed: 0_level_0,Number of train examples,For validation/ test (80/20),errors_average,UAS_x_average,fading_metric_average
Y label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.0,18977,3795.4,0.0,0.0,5.6
1.0,480,96.0,-2.0,870.2,17392.1
2.0,12,2.4,-1.5,91.6,1711.1
3.0,448,89.6,-0.3,0.0,6427.5
4.0,1666,333.2,46.2,3.5,1198.5
5.0,208,41.6,26.3,2.3,1338.6
6.0,4696,939.2,-0.1,-0.0,176.8
7.0,2229,445.8,1.5,0.1,1786.9
8.0,454,90.8,2.0,9.8,1035.5
9.0,95,19.0,85.6,7.8,1789.4


### 3. Class distrubution 24h data label (to predict each 24h period)

In [49]:

def class_distr(input_df):

    df = input_df.groupby(["Y 2"]).agg( { 'NeAlias': 'count',
                "sum_errors":"mean", "UAS_x": "mean", "fading_metric":"mean" } )

    df["Number of train examples"] =  df["NeAlias"]//96
    df["For validation/ test (80/20)"] =  df["Number of train examples"]*0.2
    df["errors_average"] = df["sum_errors"].round(1)
    df["UAS_x_average"] = df["UAS_x"].round(1)
    df["fading_metric_average"] = df["fading_metric"].round(1)
    df.drop(["NeAlias", "UAS_x", "sum_errors", "fading_metric"], axis=1, inplace=True)
    df.reset_index(inplace=True)

    # change prediction classes from value (numbers) to text
    df.loc[df["Y 2"] == 0, "Y 2"]  = "0. OK"
    df.loc[df["Y 2"] == 1, "Y 2"]  = "1. Unstable Antenna."
    df.loc[df["Y 2"] == 2, "Y 2"]  = "2. Rain and Wet Snow."
    df.loc[df["Y 2"] == 3, "Y 2"]  = "3. Affected by Rain."
    df.loc[df["Y 2"] == 4, "Y 2"]  = "4. Line of sight or Alignment problem."
    df.loc[df["Y 2"] == 5, "Y 2"]  = "5. Serious Line of sight or Alignment problem."
    df.loc[df["Y 2"] == 6, "Y 2"]  = "6. Intermittent Line of sight problem."
    df.loc[df["Y 2"] == 7, "Y 2"]  = "7. Stuck on low modulation."
    df.loc[df["Y 2"] == 8, "Y 2"]  = "8. Equipment problem."
    df.loc[df["Y 2"] == 9, "Y 2"]  = "9. Out of service detected."
    df.loc[df["Y 2"] == 10, "Y 2"] = "10. Limited data."
    df.loc[df["Y 2"] == 11, "Y 2"] = "11. A hardware restart was detected."

    df.set_index("Y 2", inplace=True)
    return df

In [61]:
# Call function

view_class_distr = class_distr(eband_dataset)
view_class_distr

Unnamed: 0_level_0,Number of train examples,For validation/ test (80/20),errors_average,UAS_x_average,fading_metric_average
Y 2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0. OK,77,15.4,-0.0,-0.0,0.8
1. Unstable Antenna.,58,11.6,15.6,1.1,435.0
2. Rain and Wet Snow.,12,2.4,-1.3,4.1,717.3
3. Affected by Rain.,66,13.2,0.2,0.7,59.8
4. Line of sight or Alignment problem.,30,6.0,0.0,0.0,154.9
5. Serious Line of sight or Alignment problem.,34,6.8,1.0,0.1,1310.9
6. Intermittent Line of sight problem.,8,1.6,7.1,0.7,403.8
7. Stuck on low modulation.,7,1.4,-0.0,0.0,4502.0
8. Equipment problem.,37,7.4,26.2,-0.0,-0.0
9. Out of service detected.,21,4.2,-1.3,539.0,11598.6


In [51]:
view_train = data_raw_train_all.groupby(["Y 2"]).agg({ 
            'NeAlias': 'count',
            "sum_errors":"mean",
            "ActualReceived_Level_x": "mean", "ModChanges_x":"mean", "UAS_x":"mean",

            "ActualReceived_Level_y": "mean", "ModChanges_y":"mean", "UAS_y": "mean",
            "hw_fault": "mean", "fading_metric": "mean",
                })

view_train["NeAlias"] =  view_train["NeAlias"]/96
view_train.reset_index(inplace=True)

# change prediction classes from value (numbers) to text
view_train.loc[view_train["Y 2"] == 0, "Y 2"]  = "The Link looks OK"
view_train.loc[view_train["Y 2"] == 1, "Y 2"]  = "Unstable antenna."
view_train.loc[view_train["Y 2"] == 2, "Y 2"]  = "Rain and Wet Snow."
view_train.loc[view_train["Y 2"] == 3, "Y 2"]  = "Affected by Rain."
view_train.loc[view_train["Y 2"] == 4, "Y 2"]  = "Line of sight or Alignment problem."
view_train.loc[view_train["Y 2"] == 5, "Y 2"]  = "Serious Line of sight or Alignment problem."
view_train.loc[view_train["Y 2"] == 6, "Y 2"]  = "Intermittent Line of sight problem."
view_train.loc[view_train["Y 2"] == 7, "Y 2"]  = "The link is stuck on low modulation."
view_train.loc[view_train["Y 2"] == 8, "Y 2"]  = "Equipment problem."
view_train.loc[view_train["Y 2"] == 9, "Y 2"]  = "Out of service detected."
view_train.loc[view_train["Y 2"] == 10, "Y 2"] = "Limited data."
view_train.loc[view_train["Y 2"] == 11, "Y 2"] = "A hardware restart was detected."

view_train.set_index("Y 2", inplace=True)
view_train

NameError: name 'data_raw_train_all' is not defined

In [None]:
view_train = view_train.round(1)
view_train.drop(["NeAlias"], axis=1, inplace=True)
#view_train = normalize(X,axis=-1,order=2)

# Normalize by column:
view_train_norm_col=(view_train-view_train.mean())/view_train.std()

In [None]:
heat_map = sb.heatmap(view_train_norm_col , annot=True)
plt.show()