# Activity Analysis
In this notebook we'll do some analysis of the Google "ActivitySegments" using pandas. We'll plot our results using Matplotlib.

In [None]:
%matplotlib inline
import pandas as pd

In [None]:
# Set this to the name of your activities file
ACTIVITIES_PATH = 'activities.csv'
activities = pd.read_csv(ACTIVITIES_PATH, encoding='utf-8', parse_dates=['start_timestamp', 'end_timestamp'])
print(f'Data has {activities.shape[0]} rows and {activities.shape[1]} columns')
activities.info()

In [None]:
activities.head()

In [None]:
# Let's count the number of each activity type in the data=
print('Number of records per activity type:')
num_records = activities['activity_type'].value_counts()
num_records

In [None]:
# Calculate total travel distance by activity type *in kilometers*
print('Total distance traveled by activity type:')
distance = activities.groupby('activity_type')['travel_distance_meters'].sum() / 1000
distance.sort_values(inplace=True)
distance

In [None]:
# Plot travel distance, by activity type
ax = distance.plot.barh()
ax.set_title('Distance traveled by activity type')
ax.set_xlabel('Number of Kilometers')
ax.set_ylabel('')

In [None]:
# Calculate total travel time by activity type *in hours*
print('Total time spent traveling by activity type:')
activities['duration'] = activities['end_timestamp'] - activities['start_timestamp']
time_spent = activities.groupby('activity_type')['duration'].sum().dt.total_seconds() / 3600
time_spent.sort_values(inplace=True)
time_spent

In [None]:
# Plot travel time, by activity type
ax = time_spent.plot.barh()
ax.set_title('Time spent by activity type')
ax.set_xlabel('Number of Hours')
ax.set_ylabel('')