## Introduction to Pandas
• Library for computation with tabular data

• Mixed types of data allowed in a single 
table

• Columns and rows of data can be named

• Advanced data aggregation and statistical 
functions

In [1]:
# Pandas Series Creation and Indexing
# (Use data from step tracking application to create a Pandas Series)

import pandas as pd

step_data = [3620, 7891, 9761,
             3907, 4338, 5373
            ]
step_counts = pd.Series(step_data, name='steps')
print(step_counts)

0    3620
1    7891
2    9761
3    3907
4    4338
5    5373
Name: steps, dtype: int64


In [2]:
# Use data from step tracking application to create a Pandas Series
import pandas as pd

step_data = [3620, 7891, 9761,
             3907, 4338, 5373
            ]

step_counts = pd.Series(step_data, name='steps')
print(step_counts)

0    3620
1    7891
2    9761
3    3907
4    4338
5    5373
Name: steps, dtype: int64


In [3]:
step_counts.index = pd.date_range('20150329', periods=6)
print(step_counts)

2015-03-29    3620
2015-03-30    7891
2015-03-31    9761
2015-04-01    3907
2015-04-02    4338
2015-04-03    5373
Freq: D, Name: steps, dtype: int64


In [4]:
# Select data by the index values
# Just like a dictionary
print(step_counts['2015-04-01'])

3907


In [5]:
# Select data by the index values

# Just like a dictionary
print(step_counts['2015-04-01'])

# Or by index position--like an array
print(step_counts.iloc[3])

3907
3907


In [6]:
# Data types can be viewed and converted
print(step_counts.dtypes)

int64


In [7]:
# Data types can viewed and converted
import numpy as np

print(step_counts.dtypes)
step_counts = step_counts.astype(np.float64)

print(step_counts)
print(step_counts.dtypes)

int64
2015-03-29    3620.0
2015-03-30    7891.0
2015-03-31    9761.0
2015-04-01    3907.0
2015-04-02    4338.0
2015-04-03    5373.0
Freq: D, Name: steps, dtype: float64
float64


In [8]:
# Invalid data points can be easily filled with values

# create invalid data
step_counts[1:3] = np.nan

# Now fill it in with zeros
step_counts = step_counts.fillna(0.)

# equivalently, step_counts.fillna(0., inplace=True)
print(step_counts[1:3])

2015-03-30    0.0
2015-03-31    0.0
Freq: D, Name: steps, dtype: float64


In [9]:
# Cycling distance
cycling_data = [10.7, 0, None, 2.4, 15.3]

# Create a tuple of data
joined_data = list(zip(step_data, cycling_data))

# The dataframe
activity_df = pd.DataFrame(joined_data)

print(activity_df)

      0     1
0  3620  10.7
1  7891   0.0
2  9761   NaN
3  3907   2.4
4  4338  15.3


In [10]:
import pandas as pd

# Sample data
joined_data = {
    'Walking': [1000, 1500, 1200, 1300, 1400, 1600],
    'Cycling': [200, 300, 250, 400, 350, 450]
}

# Add column names to dataframe and set the index
activity_df = pd.DataFrame(
    joined_data,
    index=pd.date_range('2015-03-29', periods=6),
    columns=['Walking', 'Cycling']
)

print(activity_df)

            Walking  Cycling
2015-03-29     1000      200
2015-03-30     1500      300
2015-03-31     1200      250
2015-04-01     1300      400
2015-04-02     1400      350
2015-04-03     1600      450


In [11]:
# DataFrame rows can be indexed by row using 'loc' and 'iloc' methods

print(activity_df.loc['2015-04-01'])

Walking    1300
Cycling     400
Name: 2015-04-01 00:00:00, dtype: int64


In [12]:
# Select row of data by integer position
print(activity_df.iloc[-3])

Walking    1300
Cycling     400
Name: 2015-04-01 00:00:00, dtype: int64


In [13]:
# Name of column
print(activity_df['Walking'])

2015-03-29    1000
2015-03-30    1500
2015-03-31    1200
2015-04-01    1300
2015-04-02    1400
2015-04-03    1600
Freq: D, Name: Walking, dtype: int64


In [15]:
# Object-oriented approach
print(activity_df.Walking)

2015-03-29    1000
2015-03-30    1500
2015-03-31    1200
2015-04-01    1300
2015-04-02    1400
2015-04-03    1600
Freq: D, Name: Walking, dtype: int64


In [16]:
# DataFrame columns can be indexed by integer
print(activity_df.iloc[:,0])

2015-03-29    1000
2015-03-30    1500
2015-03-31    1200
2015-04-01    1300
2015-04-02    1400
2015-04-03    1600
Freq: D, Name: Walking, dtype: int64


In [None]:
# The location of the data file
filepath = 'data/Iris_Data'