## 1. Load libraries and dependencies

In [2]:
import warnings
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import tensorflow as tf
warnings.filterwarnings('ignore') #This line filters all warnings from import.

%matplotlib inline
plt.style.use('ggplot')
sns.set(color_codes=True)

## 2. Download the Data

In [3]:
import os
from zipfile import ZipFile
from six.moves import urllib

DOWNLOAD_ROOT = "https://bbdc.csl.uni-bremen.de/"
BBDC_PATH = "images/2019"
BBDC_URL = DOWNLOAD_ROOT + BBDC_PATH + "/bbdc_2019_Bewegungsdaten_mit_referenz.zip"

def fetch_actvty_recogn_data(bbdc_url=BBDC_URL, bbdc_path=BBDC_PATH):
    if not os.path.isdir(bbdc_path):
        os.makedirs(bbdc_path)
    zip_path = os.path.join(bbdc_path, "bbdc_2019_Bewegungsdaten_mit_referenz.zip")
    urllib.request.urlretrieve(bbdc_url, zip_path)
    activity_zip = ZipFile(zip_path)
    activity_zip.extractall(path=bbdc_path)
    activity_zip.close()

In [4]:
# Call the function to fetch the data
fetch_actvty_recogn_data()

### Load the Training Data

In [26]:
def load_activity_data(bbdc_path=BBDC_PATH):        # Function to load the train data
    train_set = os.path.join(bbdc_path, "train.csv")
    return pd.read_csv(train_set)

### Take a quick look at the Data Structure

In [6]:
train_set = load_activity_data()
train_set.head()

Unnamed: 0,Subject,Datafile,Label
0,Subject02,Subject02/Subject02_Aufnahme000.csv,curve-left-step
1,Subject02,Subject02/Subject02_Aufnahme001.csv,curve-left-step
2,Subject02,Subject02/Subject02_Aufnahme002.csv,stand-to-sit
3,Subject02,Subject02/Subject02_Aufnahme003.csv,curve-right-spin-Rfirst
4,Subject02,Subject02/Subject02_Aufnahme004.csv,jump-one-leg


In [8]:
# Total number of rows and columns
train_set.shape

(6401, 3)

In [9]:
# Rows containing duplicate data
duplicate_rows_train_set = train_set[train_set.duplicated()]
print("number of duplicate rows: ", duplicate_rows_train_set.shape)

number of duplicate rows:  (0, 3)


In [10]:
# Counting the number of rows if duplicates were found and removed.    #Step not necessary since no duplicates were found
train_set.count()

Subject     6401
Datafile    6401
Label       6401
dtype: int64

In [11]:
train_set["Subject"].value_counts()

Subject06    455
Subject03    440
Subject17    440
Subject19    440
Subject02    440
Subject07    440
Subject13    440
Subject12    440
Subject09    439
Subject04    438
Subject05    438
Subject18    436
Subject11    432
Subject08    428
Subject16    255
Name: Subject, dtype: int64

In [12]:
# Finding the null values.
print(train_set.isnull().sum())

Subject     0
Datafile    0
Label       0
dtype: int64


In [13]:
train_set.tail()

Unnamed: 0,Subject,Datafile,Label
6396,Subject19,Subject19/Subject19_Aufnahme435.csv,curve-left-spin-Rfirst
6397,Subject19,Subject19/Subject19_Aufnahme436.csv,jump-one-leg
6398,Subject19,Subject19/Subject19_Aufnahme437.csv,jump-two-leg
6399,Subject19,Subject19/Subject19_Aufnahme438.csv,curve-right-step
6400,Subject19,Subject19/Subject19_Aufnahme439.csv,curve-right-spin-Rfirst


In [14]:
# We get a quick description of the data
train_set.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6401 entries, 0 to 6400
Data columns (total 3 columns):
Subject     6401 non-null object
Datafile    6401 non-null object
Label       6401 non-null object
dtypes: object(3)
memory usage: 150.1+ KB


In [15]:
# We get a summary of the numerical attributes
train_set.describe()

Unnamed: 0,Subject,Datafile,Label
count,6401,6401,6401
unique,15,6401,23
top,Subject06,Subject13/Subject13_Aufnahme358.csv,curve-left-spin-Rfirst
freq,455,1,320


In [16]:
train_set.dtypes

Subject     object
Datafile    object
Label       object
dtype: object

### Load the Test Data

In [24]:
import pandas as pd 

def load_activity_data(bbdc_path=BBDC_PATH):        # Function to load the train data
    test_set = os.path.join(bbdc_path, "challenge.csv")
    return pd.read_csv(test_set)

In [25]:
test_set = load_activity_data()
test_set.head()

Unnamed: 0,Subject,Datafile,Label
0,Subject01,Subject01/Subject01_Aufnahme000.csv,X
1,Subject01,Subject01/Subject01_Aufnahme001.csv,X
2,Subject01,Subject01/Subject01_Aufnahme002.csv,X
3,Subject01,Subject01/Subject01_Aufnahme003.csv,X
4,Subject01,Subject01/Subject01_Aufnahme004.csv,X
