In [22]:
import pandas as pd
import numpy as np


In [23]:

# Pandas Series creation and indexing 

step_data = [360,7891, 9761, 3907, 4338, 5373]

step_counts = pd.Series(step_data, name="steps")

print(step_counts)

0     360
1    7891
2    9761
3    3907
4    4338
5    5373
Name: steps, dtype: int64


In [24]:
# add date range to a series 

step_counts.index = pd.date_range('20150329', periods=6)

print(step_counts)

2015-03-29     360
2015-03-30    7891
2015-03-31    9761
2015-04-01    3907
2015-04-02    4338
2015-04-03    5373
Freq: D, Name: steps, dtype: int64


In [25]:
# select data by index values 
# just like a dict

print(step_counts['2015-04-01'])

# or by indexing position like in arrays 
print(step_counts[3])

# select all of april
print(step_counts['2015-04'])

3907
3907
2015-04-01    3907
2015-04-02    4338
2015-04-03    5373
Freq: D, Name: steps, dtype: int64


In [26]:
# Data types viewing and converting

#view a data type
print(step_counts.dtypes)

# convert to a float 
step_counts = step_counts.astype(np.float)

int64


AttributeError: module 'pandas' has no attribute 'float'

In [28]:
# DataFrames can be created from lists, dicts, pandas series 

# clycling distance 
cycling_data = [10.7, 0, None , 2.4, 15.3, 10.9, None]

# creating a tuple of data
joined_data = list(zip(step_data, cycling_data))

# the data frame 
activity_df = pd.DataFrame(joined_data)

print(activity_df)

      0     1
0   360  10.7
1  7891   0.0
2  9761   NaN
3  3907   2.4
4  4338  15.3
5  5373  10.9


In [30]:
# label columns and an index can be added

# add a column name to dataframe
activity_df = pd.DataFrame(
    joined_data,
    index = pd.date_range('20150329', periods=6),
    columns=['Walking', 'Cycling']
)

print(activity_df)

            Walking  Cycling
2015-03-29      360     10.7
2015-03-30     7891      0.0
2015-03-31     9761      NaN
2015-04-01     3907      2.4
2015-04-02     4338     15.3
2015-04-03     5373     10.9


In [31]:
# Dataframe rows can be indexted by row using 'loc' and 'iloc' methods 
print(activity_df.loc['2015-04-01'])

Walking    3907.0
Cycling       2.4
Name: 2015-04-01 00:00:00, dtype: float64


In [32]:
# select row of data by integer position
print(activity_df.iloc[-3])

Walking    3907.0
Cycling       2.4
Name: 2015-04-01 00:00:00, dtype: float64


In [33]:
# DataFrame columns can be indexed by name 
print(activity_df['Walking'])

2015-03-29     360
2015-03-30    7891
2015-03-31    9761
2015-04-01    3907
2015-04-02    4338
2015-04-03    5373
Freq: D, Name: Walking, dtype: int64


In [34]:
# dataframes columns can also be indexed as properties
print(activity_df.Walking)

2015-03-29     360
2015-03-30    7891
2015-03-31    9761
2015-04-01    3907
2015-04-02    4338
2015-04-03    5373
Freq: D, Name: Walking, dtype: int64


In [35]:
#data frames can be indexed by integer
print(activity_df.iloc[:,0])

2015-03-29     360
2015-03-30    7891
2015-03-31    9761
2015-04-01    3907
2015-04-02    4338
2015-04-03    5373
Freq: D, Name: Walking, dtype: int64
