# Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Read Data

In [None]:
df_chromecast = pd.read_csv('../data/dataset_chromecast.csv')
df_smart_tv = pd.read_csv('../data/dataset_smart-tv.csv')

# Data Preprocessing

## Log Transformation

### Chromecast

In [None]:
df_chromecast['log_bytes_down'] = np.log(df_chromecast['bytes_down'] + 1)
df_chromecast['log_bytes_up'] = np.log(df_chromecast['bytes_up'] + 1)

### Smart TV

In [None]:
df_smart_tv['log_bytes_down'] = np.log(df_smart_tv['bytes_down'] + 1)
df_smart_tv['log_bytes_up'] = np.log(df_smart_tv['bytes_up'] + 1)

## Create Hour Column

### Chromecast

In [None]:
df_chromecast['hour'] = df_chromecast['date_hour'].apply(lambda x: int(x.split(' ')[1].split(':')[0]))

### Smart TV

In [None]:
df_smart_tv['hour'] = df_smart_tv['date_hour'].apply(lambda x: int(x.split(' ')[1].split(':')[0]))

# Data Analysis (General)

## Chromecast

### Histogram

In [None]:
chromecast_hist_bin = int(1 + 3.3 * np.log10(len(df_chromecast['log_bytes_up'])))
fig = plt.figure()
plt.hist(df_chromecast['log_bytes_up'], bins=chromecast_hist_bin, color='blue', alpha=0.5, edgecolor = 'black')
plt.xlabel('Bytes Up')
plt.ylabel('Frequency')
plt.title('Chromecast histogram of Bytes Up')
fig.savefig('../images/chromecast/general/histogram/chromecast_hist_bytes_up.png', dpi=fig.dpi)

In [None]:
chromecast_hist_bin = int(1 + 3.3 * np.log10(len(df_chromecast['log_bytes_down'])))
fig = plt.figure()
plt.hist(df_chromecast['log_bytes_down'], bins=chromecast_hist_bin, color='red', alpha=0.5, edgecolor = 'black')
plt.xlabel('Bytes Down')
plt.ylabel('Frequency')
plt.title('Chromecast histogram of Bytes Down')
fig.savefig('../images/chromecast/general/histogram/chromecast_hist_bytes_down.png', dpi=fig.dpi)

### Empirical Distribution Function

In [None]:
fig = plt.figure()
plt.plot(df_chromecast['log_bytes_up'].sort_values(), np.linspace(0, 1, len(df_chromecast['log_bytes_up'])), color='blue', marker='.')
plt.xlabel('Bytes Up')
plt.ylabel('Fx(x)')
plt.title('Chromecast Empirical Distribution Function of Bytes Up')
fig.savefig('../images/chromecast/general/edf/chromecast_edf_bytes_up.png', dpi=fig.dpi)

In [None]:
fig = plt.figure()
plt.plot(df_chromecast['log_bytes_down'].sort_values(), np.linspace(0, 1, len(df_chromecast['log_bytes_down'])), color='red', marker='.')
plt.xlabel('Bytes Down')
plt.ylabel('Fx(x)')
plt.title('Chromecast Empirical Distribution Function of Bytes Down')
fig.savefig('../images/chromecast/general/edf/chromecast_edf_bytes_down.png', dpi=fig.dpi)

### Boxplot

In [None]:
fig = plt.figure()
plt.boxplot([df_chromecast['log_bytes_up'], df_chromecast['log_bytes_down']], labels=['Bytes Up', 'Bytes Down'], patch_artist=True, boxprops=dict(facecolor="cyan", color="black"), medianprops=dict(color="black"), whiskerprops=dict(color="black"), capprops=dict(color="black"))
plt.ylabel('Bytes')
plt.title('Chromecast Boxplot of Bytes Up and Bytes Down')
fig.savefig('../images/chromecast/general/boxplot/chromecast_boxplot_bytes_up_down.png', dpi=fig.dpi)

### Statistical Analysis

In [None]:
chromecast_bytes_up_down = pd.DataFrame({'Bytes Up': [df_chromecast['log_bytes_up'].mean(), df_chromecast['log_bytes_up'].var(), df_chromecast['log_bytes_up'].std()], 'Bytes Down': [df_chromecast['log_bytes_down'].mean(), df_chromecast['log_bytes_down'].var(), df_chromecast['log_bytes_down'].std()]}, index=['Mean', 'Variance', 'Standard Deviation'])
chromecast_bytes_up_down

## Smart TV

### Histogram

In [None]:
smart_tv_hist_bin = int(1 + 3.3 * np.log10(len(df_smart_tv['log_bytes_up'])))
fig = plt.figure()
plt.hist(df_smart_tv['log_bytes_up'], bins=smart_tv_hist_bin, color='blue', alpha=0.5, edgecolor = 'black')
plt.xlabel('Bytes Up')
plt.ylabel('Frequency')
plt.title('Smart TV histogram of Bytes Up')
fig.savefig('../images/smart_tv/general/histogram/smart_tv_hist_bytes_up.png', dpi=fig.dpi)

In [None]:
smart_tv_hist_bin = int(1 + 3.3 * np.log10(len(df_smart_tv['log_bytes_down'])))
fig = plt.figure()
plt.hist(df_smart_tv['log_bytes_down'], bins=smart_tv_hist_bin, color='red', alpha=0.5, edgecolor = 'black')
plt.xlabel('Bytes Down')
plt.ylabel('Frequency')
plt.title('Smart TV histogram of Bytes Down')
fig.savefig('../images/smart_tv/general/histogram/smart_tv_hist_bytes_down.png', dpi=fig.dpi)

### Empirical Distribution Function

In [None]:
fig = plt.figure()
plt.plot(df_smart_tv['log_bytes_up'].sort_values(), np.linspace(0, 1, len(df_smart_tv['log_bytes_up'])), color='blue', marker='.')
plt.xlabel('Bytes Up')
plt.ylabel('Fx(x)')
plt.title('Smart TV Empirical Distribution Function of Bytes Up')
fig.savefig('../images/smart_tv/general/edf/smart_tv_edf_bytes_up.png', dpi=fig.dpi)

In [None]:
fig = plt.figure()
plt.plot(df_smart_tv['log_bytes_down'].sort_values(), np.linspace(0, 1, len(df_smart_tv['log_bytes_down'])), color='red', marker='.')
plt.xlabel('Bytes Down')
plt.ylabel('Fx(x)')
plt.title('Smart TV Empirical Distribution Function of Bytes Down')
fig.savefig('../images/smart_tv/general/edf/smart_tv_edf_bytes_down.png', dpi=fig.dpi)

### Boxplot

In [None]:
fig = plt.figure()
plt.boxplot([df_smart_tv['log_bytes_up'], df_smart_tv['log_bytes_down']], labels=['Bytes Up', 'Bytes Down'], patch_artist=True, boxprops=dict(facecolor="cyan", color="black"), medianprops=dict(color="black"), whiskerprops=dict(color="black"), capprops=dict(color="black"))
plt.ylabel('Bytes')
plt.title('Smart TV Boxplot of Bytes Up and Bytes Down')
fig.savefig('../images/smart_tv/general/boxplot/smart_tv_boxplot_bytes_up_down.png', dpi=fig.dpi)

### Statistical Analysis

In [None]:
smart_tv_bytes_up_down = pd.DataFrame({'Bytes Up': [df_smart_tv['log_bytes_up'].mean(), df_smart_tv['log_bytes_up'].var(), df_smart_tv['log_bytes_up'].std()], 'Bytes Down': [df_smart_tv['log_bytes_down'].mean(), df_smart_tv['log_bytes_down'].var(), df_smart_tv['log_bytes_down'].std()]}, index=['Mean', 'Variance', 'Standard Deviation'])
smart_tv_bytes_up_down

# Data Analysis (Hourly)

## Chromecast

### Boxplot

In [None]:
for hour in range(0, 24):
    fig = plt.figure()
    plt.boxplot([df_chromecast[df_chromecast['hour'] == hour]['log_bytes_up'], df_chromecast[df_chromecast['hour'] == hour]['log_bytes_down']], labels=['Bytes Up', 'Bytes Down'], patch_artist=True, boxprops=dict(facecolor="cyan", color="black"), medianprops=dict(color="black"), whiskerprops=dict(color="black"), capprops=dict(color="black"))
    plt.ylabel('Bytes')
    plt.title('Chromecast Boxplot of Bytes Up and Bytes Down for Hour ' + str(hour))
    fig.savefig('../images/chromecast/hourly/boxplot/chromecast_boxplot_bytes_up_down_hour_' + str(hour) + '.png', dpi=fig.dpi)

### Statistical Analysis

In [None]:
fig = plt.figure()
plt.plot(df_chromecast.groupby('hour')['log_bytes_up'].mean(), color='red', marker='.')
plt.plot(df_chromecast.groupby('hour')['log_bytes_up'].var(), color='green', marker='.')
plt.plot(df_chromecast.groupby('hour')['log_bytes_up'].std(), color='blue', marker='.')
plt.xlabel('Hour')
plt.ylabel('Bytes')
plt.legend(['Mean', 'Variance', 'Standard Deviation'])
plt.title('Chromecast Bytes Up Mean, Variance and Standard Deviation by Hour')
fig.savefig('../images/chromecast/hourly/statistical_analysis/chromecast_line_bytes_up_mean_var_std_hour.png', dpi=fig.dpi)

## Smart TV

### Boxplot

In [None]:
for hour in range(0, 24):
    fig = plt.figure()
    plt.boxplot([df_smart_tv[df_smart_tv['hour'] == hour]['log_bytes_up'], df_smart_tv[df_smart_tv['hour'] == hour]['log_bytes_down']], labels=['Bytes Up', 'Bytes Down'], patch_artist=True, boxprops=dict(facecolor="cyan", color="black"), medianprops=dict(color="black"), whiskerprops=dict(color="black"), capprops=dict(color="black"))
    plt.ylabel('Bytes')
    plt.title('Smart TV Boxplot of Bytes Up and Bytes Down for Hour ' + str(hour))
    fig.savefig('../images/smart_tv/hourly/boxplot/smart_tv_boxplot_bytes_up_down_hour_' + str(hour) + '.png', dpi=fig.dpi)

### Statistical Analysis

In [None]:
fig = plt.figure()
plt.plot(df_smart_tv.groupby('hour')['log_bytes_up'].mean(), color='red', marker='.')
plt.plot(df_smart_tv.groupby('hour')['log_bytes_up'].var(), color='green', marker='.')
plt.plot(df_smart_tv.groupby('hour')['log_bytes_up'].std(), color='blue', marker='.')
plt.xlabel('Hour')
plt.ylabel('Bytes')
plt.legend(['Mean', 'Variance', 'Standard Deviation'])
plt.title('Smart TV Bytes Up Mean, Variance and Standard Deviation by Hour')
fig.savefig('../images/smart_tv/hourly/statistical_analysis/smart_tv_line_bytes_up_mean_var_std_hour.png', dpi=fig.dpi)