# Pandas Examples

---
## Example 1 - Product Demand

In [None]:
import pandas as pd
import numpy as np

In [None]:
# Create a list of products and a list of demands
products = ['Product 1', 'Product 2', 'Product 4', 'Product 6']
demands = [29, 17, 42, 21]

In [None]:
# Create a data set from these two lists
productDataset = list(zip(products, demands))
productDataset

In [None]:
# Create a pandas dataframe
df = pd.DataFrame(data=productDataset, columns=['Product Name', 'Demand'])
df

#### 1) Print just the column of product names:

#### 2) Print the demand column first, and the product name column second:

#### 3) Print only the first 2 rows of data:

#### 4) Print only the first 2 rows, and only the demand column:

#### 5) Print only the last 2 rows of data.  Print the demand column before the product name column.

#### 6) Sort the products according to decreasing demand:

#### 7)  Find the product with the maximum demand
**NOTE:  Save both the product name and the corresponding maximum demand.  Do not use "for" loops.**

#### 8)  Add a new column of data, named 'Sales Price'

**Assign the values as numpy NaN for now***

#### 9) Assign the following prices to your new column

In [None]:
prices = [1.99, 2.50, 0.99, 3.75]

#### 10)  Add another column named 'Cost', with the following values

In [None]:
costs = [1.0, 2.0, 0.5, 2.25]

#### 11) Add yet another column named 'Profit'
**Profit = (sales price) - (cost)**

#### 12)  Add a row of data
**Product 9 has a demand of 99, a sales price of 9.99, a cost of 8.88, and a profit of 1.11**

#### 13) Delete the Sales Price column

#### 14) Make Product Name an index

#### 15)  Filter just for Product 4

#### 16) Filter for products 1 through 4

---

## Example 2 -- Daily Show Guests

In [None]:
# Import the Daily Show guest list
csvFile = 'daily_show_guests.csv'
df = pd.read_csv(csvFile)
df

In [None]:
df.dtypes

In [None]:
# Convert the 'Show' date (d/m/YY) to a pandas datestamp
df['date'] = pd.to_datetime(df['Show'], infer_datetime_format=True)
df.head()

In [None]:
df['Year'] = pd.DatetimeIndex(df['date']).year
df['Month'] = pd.DatetimeIndex(df['date']).month
df['Day'] = pd.DatetimeIndex(df['date']).day
df['Weekday'] = pd.DatetimeIndex(df['date']).weekday
df.head()

In [None]:
# Make the date an index:
df.index = df['date']
df.index.weekday

In [None]:
# Weekdays start with 0 (Monday) thru 6 (Sunday)
weekdayList = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

In [None]:
# Get some general information about our dataframe:
df['Group'].describe()

In [None]:
# Create groups:
group = df.groupby('Group').size()
group

In [None]:
df['Group'][df['Group'] == 'media'] = 'Media'
df

In [None]:
group = df.groupby('Group').size()
group

In [None]:
# Create a new DataFrame with just group count info:
counts_df = pd.DataFrame(df.groupby('Group').size().rename('counts'))
counts_df

In [None]:
# Create a plot of these counts
import matplotlib.pyplot as plt

Sorted = counts_df.sort_values(['counts'], ascending=False)
Sorted.plot(kind='bar')
plt.show()

In [None]:
# Find records with missing group names
df[['Group']][pd.isnull(df['Group']) == True]

In [None]:
# Let's find the total number of guests:
df['Raw_Guest_List'].describe()

In [None]:
# Filter for musicians.  Show the musician names:
df[['Raw_Guest_List']][df['Group'] == 'Musician']

In [None]:
# Find how many unique musicians:
df[['Raw_Guest_List']][df['Group'] == 'Musician'].describe()

In [None]:
# List the unique musicians:
uniqueMusicians = df['Raw_Guest_List'][df['Group'] == 'Musician'].unique()
uniqueMusicians

In [None]:
# Find the guests that appeared in 2010
# Option 1 (use the year column):
df[['Raw_Guest_List']][df['Year'] == 2010]

In [None]:
# Option 2 (use the date index):
print df[['Raw_Guest_List', 'Group']].loc['2010-1-1':'2010-12-31']

In [None]:
# Find the guests that appeared in 2010 and were actors
df[['Raw_Guest_List', 'Group']][df['Group'] == 'Acting'].loc['2010-1-1':'2010-12-31']

In [None]:
df[['Raw_Guest_List', 'Group']][df['Group'] == 'Acting'].loc['2010-1-1':'2010-12-31'].describe()

In [None]:
# Find the guests that appeared in November and were politicians
df[['Raw_Guest_List']][(df['Month'] == 11) & (df['Group'] == 'Politician')]

In [None]:
# Count the number of shows by weekday
weekday_counts = df.groupby('Weekday').count()
weekday_counts[['Year']]


---

## Example 3 -- Scraping HTML Tables

*This material comes from Wes McKinney's textbook*

In [None]:
tables = pd.read_html('https://www.fdic.gov/bank/individual/failed/banklist.html')

In [None]:
# How many tables did we find on this webpage?
len(tables)

In [None]:
# Our "failure" data will be in the first (and only) table:
failures = tables[0]

In [None]:
failures.head()

In [None]:
# How many bank failures were there per year?
close_timestamps = pd.to_datetime(failures['Closing Date'])
close_timestamps.dt.year.value_counts()

In [None]:
# Which bank failures were in NY?
failures[failures['ST'] == 'NY']

In [None]:
# How many bank failures were in NY?
failures[failures['ST'] == 'NY']['ST'].count()


---

## Further Studying

Check out https://realpython.com/python-data-cleaning-numpy-pandas/ for some more examples.