## Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline

In [2]:
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

print(__version__) # requires version >= 1.9.0
import cufflinks as cf
init_notebook_mode(connected=True)
cf.go_offline()

4.14.3


## Reading csv file into dataframe

In [3]:
df = pd.read_csv('smartphone.csv')

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1528218 entries, 0 to 1528217
Data columns (total 4 columns):
index        1528218 non-null int64
source       1528218 non-null object
timestamp    1528218 non-null object
values       1528218 non-null object
dtypes: int64(1), object(3)
memory usage: 46.6+ MB


In [5]:
df.head()

Unnamed: 0,index,source,timestamp,values
0,1316828,step_detector,2017-06-29 07:45:16.506,['1.0']
1,1316829,battery,2017-06-29 07:45:16.514,['100']
2,1316830,activity,2017-06-29 07:45:16.579,['STILL: 100']
3,1316831,audio,2017-06-29 07:45:17.407,"['0.0', '0.0', '0.0', '32767.0']"
4,1316832,audio,2017-06-29 07:45:18.407,"['227.0', '113.5', '227.0', '227.0']"


In [6]:
df['source'].value_counts()

audio                  501293
step_detector          116671
orientation             99877
accelerometer           99867
gyroscope               99866
magnetometer            99762
gravity                 99750
linear_acceleration     99750
rotationVector          99747
pressure                98649
wifi                    63541
activity                16461
bluetooth               10127
light                    8999
step_counter             8899
battery                  4873
proximity                  86
Name: source, dtype: int64

## Analysis

In [7]:
df.head()

Unnamed: 0,index,source,timestamp,values
0,1316828,step_detector,2017-06-29 07:45:16.506,['1.0']
1,1316829,battery,2017-06-29 07:45:16.514,['100']
2,1316830,activity,2017-06-29 07:45:16.579,['STILL: 100']
3,1316831,audio,2017-06-29 07:45:17.407,"['0.0', '0.0', '0.0', '32767.0']"
4,1316832,audio,2017-06-29 07:45:18.407,"['227.0', '113.5', '227.0', '227.0']"


In [8]:
df['source'].value_counts()

audio                  501293
step_detector          116671
orientation             99877
accelerometer           99867
gyroscope               99866
magnetometer            99762
gravity                 99750
linear_acceleration     99750
rotationVector          99747
pressure                98649
wifi                    63541
activity                16461
bluetooth               10127
light                    8999
step_counter             8899
battery                  4873
proximity                  86
Name: source, dtype: int64

In [9]:
df.head(100).iplot(kind='bar',x='source',y='values')

## Analysing the TimeStamp

In [10]:
df['timestamp'].iloc[0]

'2017-06-29 07:45:16.506'

In [11]:
type(df['timestamp'].iloc[0])

str

In [12]:
df['timestamp'] = pd.to_datetime(df['timestamp'])

In [13]:
df['Hour'] = df['timestamp'].apply(lambda time: time.hour)
df['Month'] = df['timestamp'].apply(lambda time: time.month)
df['Day of Week'] = df['timestamp'].apply(lambda time: time.dayofweek)

In [14]:
dmap = {0:'Mon',1:'Tue',2:'Wed',3:'Thu',4:'Fri',5:'Sat',6:'Sun'}
df['Day of Week'] = df['Day of Week'].map(dmap)

In [15]:
def daytime(num):
    if(5<= num<12):
        return 'Morning'
    elif(12<= num < 17):
        return 'Afternoon'
    elif(17<= num <24 or 0<= num <5):
        return 'Evening'


In [16]:
df['DayTime'] = df['Hour'].apply(daytime)

In [17]:
df.head()

Unnamed: 0,index,source,timestamp,values,Hour,Month,Day of Week,DayTime
0,1316828,step_detector,2017-06-29 07:45:16.506,['1.0'],7,6,Thu,Morning
1,1316829,battery,2017-06-29 07:45:16.514,['100'],7,6,Thu,Morning
2,1316830,activity,2017-06-29 07:45:16.579,['STILL: 100'],7,6,Thu,Morning
3,1316831,audio,2017-06-29 07:45:17.407,"['0.0', '0.0', '0.0', '32767.0']",7,6,Thu,Morning
4,1316832,audio,2017-06-29 07:45:18.407,"['227.0', '113.5', '227.0', '227.0']",7,6,Thu,Morning


In [23]:
df.head(5000).iplot(kind='bar',x='DayTime',y='source')

## Converting 'Values' from list of strings to list of float

In [20]:
def myfunc(mystr):
    try:
        newlst = mystr[1:-1].split(', ')
        for i in range(0,len(newlst)):
            if ':' in newlst[i]:
                newlst[i] = float(newlst[i].split(': ')[1][1:-1])
            elif newlst[i][0] == "'":
                newlst[i] = float(newlst[i][1:-1])
            else:
                newlst[i] = float(newlst[i])
        return newlst
    except:
        return np.nan

In [21]:
myfunc(df['values'][1528216])

[7207.0, 5500.0, 8953.0, 4838.0, 3544.0, 3564.0]

In [22]:
df['values'] = df['values'].apply(myfunc)

In [23]:
df.head()

Unnamed: 0,index,source,timestamp,values,Hour,Month,Day of Week,DayTime
0,1316828,step_detector,2017-06-29 07:45:16.506,[1.0],7,6,Thu,Morning
1,1316829,battery,2017-06-29 07:45:16.514,[100.0],7,6,Thu,Morning
2,1316830,activity,2017-06-29 07:45:16.579,[0.0],7,6,Thu,Morning
3,1316831,audio,2017-06-29 07:45:17.407,"[0.0, 0.0, 0.0, 32767.0]",7,6,Thu,Morning
4,1316832,audio,2017-06-29 07:45:18.407,"[227.0, 113.5, 227.0, 227.0]",7,6,Thu,Morning


In [24]:
newdf = df.drop(['index', 'timestamp'],axis=1)

In [25]:
newdf.head()

Unnamed: 0,source,values,Hour,Month,Day of Week,DayTime
0,step_detector,[1.0],7,6,Thu,Morning
1,battery,[100.0],7,6,Thu,Morning
2,activity,[0.0],7,6,Thu,Morning
3,audio,"[0.0, 0.0, 0.0, 32767.0]",7,6,Thu,Morning
4,audio,"[227.0, 113.5, 227.0, 227.0]",7,6,Thu,Morning


In [26]:
newdf.dropna()

Unnamed: 0,source,values,Hour,Month,Day of Week,DayTime
0,step_detector,[1.0],7,6,Thu,Morning
1,battery,[100.0],7,6,Thu,Morning
2,activity,[0.0],7,6,Thu,Morning
3,audio,"[0.0, 0.0, 0.0, 32767.0]",7,6,Thu,Morning
4,audio,"[227.0, 113.5, 227.0, 227.0]",7,6,Thu,Morning
5,activity,[0.0],7,6,Thu,Morning
6,activity,[0.0],7,6,Thu,Morning
7,audio,"[590.0, 272.33334, 590.0, 227.0]",7,6,Thu,Morning
8,audio,"[1724.0, 635.25, 1724.0, 227.0]",7,6,Thu,Morning
9,audio,"[946.0, 697.4, 1724.0, 227.0]",7,6,Thu,Morning


In [27]:
def mysumfunc(mylist):
    try:
        mysum = 0
        for i in (mylist):
            mysum = mysum+i
        return mysum
    except:
        return np.nan
        

In [28]:
df['Final Values'] = df['values'].apply(mysumfunc)

In [29]:
df['Final Values'][0]

1.0

In [30]:
newdf.head()

Unnamed: 0,source,values,Hour,Month,Day of Week,DayTime
0,step_detector,[1.0],7,6,Thu,Morning
1,battery,[100.0],7,6,Thu,Morning
2,activity,[0.0],7,6,Thu,Morning
3,audio,"[0.0, 0.0, 0.0, 32767.0]",7,6,Thu,Morning
4,audio,"[227.0, 113.5, 227.0, 227.0]",7,6,Thu,Morning


In [28]:
by_source = newdf.groupby("source")

In [29]:
pvmatrix = df.pivot_table(values='Final Values',index=['source'],columns=['DayTime'])

In [30]:
pvmatrix


DayTime,Afternoon,Evening,Morning
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
accelerometer,-2.126475,-2.628085,-3.645166
activity,0.0,0.0,0.0
audio,44662.151021,41801.812986,42881.264111
battery,55.562573,26.741393,84.214785
bluetooth,1085.10851,1052.477106,1031.417162
gravity,-2.338013,-2.787422,-3.850559
gyroscope,-0.026714,-0.03012,-0.012648
light,217.872094,98.516968,13.243398
linear_acceleration,0.219094,0.156232,0.199768
magnetometer,-11.333289,5.455325,-9.267959
