In [None]:
import pandas as pd

### Pandas Series

In [None]:
# pd.Series(data, index)
groceries = pd.Series(data=[30, 6, 'Yes', 'No'], index=['eggs', 'apples', 'milk', 'bread'])
groceries

In [None]:
groceries.shape

In [None]:
groceries.ndim

In [None]:
groceries.size

In [None]:
groceries.values

In [None]:
groceries.index

In [None]:
x = 'bananas' in groceries
x

In [None]:
y = 'bread' in groceries
y

In [None]:
groceries['eggs']

In [None]:
groceries[['milk', 'bread']]

In [None]:
groceries.loc[['eggs', 'apples']]

In [None]:
 groceries[[0, 1]]

In [None]:
groceries[[-1]]

In [None]:
groceries[0]

In [None]:
groceries.iloc[[2, 3]]

In [None]:
groceries

In [None]:
groceries['eggs'] = 2

In [None]:
groceries

In [None]:
# We can also delete items from a Pandas Series by using the .drop() method
groceries.drop('apples')

In [None]:
groceries

In [None]:
groceries.drop('apples', inplace=True)

In [None]:
groceries

In [None]:
# https://pandas.pydata.org/pandas-docs/stable/reference/series.html#reindexing-selection-label-manipulation

In [None]:
fruits = pd.Series(data=[10, 6, 3, ], index=['apples', 'oranges', 'bananas'])
fruits

In [None]:
fruits + 2

In [None]:
fruits - 2

In [None]:
fruits * 2

In [None]:
fruits / 2

In [None]:
import numpy as np

In [None]:
np.exp(fruits)

In [None]:
np.sqrt(fruits)

In [None]:
np.power(fruits, 2)

In [None]:
x = fruits['bananas'] + 2
x

In [None]:
fruits.iloc[0] - 2

In [None]:
fruits[['apples', 'oranges']] * 2

In [None]:
fruits.loc[['apples', 'oranges']] / 2

In [None]:
groceries * 2

In [None]:
# https://pandas.pydata.org/pandas-docs/stable/reference/series.html#indexing-iteration
# https://pandas.pydata.org/pandas-docs/stable/reference/series.html#reindexing-selection-label-manipulation

### Data Frames

In [None]:
items = {'Bob': pd.Series(data=[245, 25, 55], index=['bike', 'pants', 'watch']),
         'Alice': pd.Series(data=[40, 110, 500, 45], index=['book', 'glasses', 'bike', 'pants'])}


In [None]:
shopping_carts = pd.DataFrame(items)
shopping_carts

In [None]:
data = {'Bob': pd.Series([245, 25, 55]),
        'Alice': pd.Series([40, 110, 500, 45])}

df = pd.DataFrame(data)
df

In [None]:
shopping_carts

In [None]:
shopping_carts.shape

In [None]:
shopping_carts.ndim

In [None]:
shopping_carts.size

In [None]:
shopping_carts.values

In [None]:
shopping_carts.index

In [None]:
shopping_carts.columns

In [None]:
items

In [None]:
bob_shopping_cart = pd.DataFrame(items, columns=['Bob'])
bob_shopping_cart

In [None]:
sel_shopping_cart = pd.DataFrame(items, index=['pants', 'book'])
sel_shopping_cart

In [None]:
alice_sel_shopping_cart = pd.DataFrame(items, index=['glasses', 'bike'], columns=['Alice'])
alice_sel_shopping_cart

In [None]:
data = {'Integers': [1, 2, 3],
        'Floats': [4.5, 8.2, 9.6]}
df = pd.DataFrame(data)
df

In [None]:
data = {'Integers': [1, 2, 3],
        'Floats': [4.5, 8.2, 9.6]}
df = pd.DataFrame(data, index=['label 1', 'label 2', 'label 3'])
df

In [None]:
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35},
          {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants': 5}]
store_items = pd.DataFrame(items2)
store_items

In [None]:
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35},
          {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants': 5}]
store_items = pd.DataFrame(items2, index=['store 1', 'store 2'])
store_items

In [None]:
# https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html#intro-to-data-structures
# https://pandas.pydata.org/pandas-docs/stable/reference/frame.html#dataframe

In [None]:
# Access elements using labels
store_items[['bikes']]

In [None]:
store_items[['bikes', 'pants']]

In [None]:
x = store_items.loc[['store 1']]
x

In [None]:
x = store_items['bikes']['store 2']
x

In [None]:
# Add a column to an existing DataFrame
store_items['shirts'] = [15, 2]
store_items

In [None]:
store_items['suits'] = store_items['pants'] + store_items['shirts']
store_items

In [None]:
# Create a row to be added to the DataFrame
new_items = [{'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4}]
new_store = pd.DataFrame(new_items, index=['store 3'])
new_store

In [None]:
store_items = pd.concat([store_items, new_store])
store_items

In [None]:
# Add new column that has data from the existing columns
store_items['new watches'] = store_items['watches'][1:]
store_items

In [None]:
# Add new column at a specific location
store_items.insert(4, 'shoes', [8, 5, 0])
store_items

In [None]:
# Delete one column from a DataFrame
store_items.pop('pants')
store_items

In [None]:
# Delete multiple columns from a DataFrame
store_items = store_items.drop(['watches', 'shoes'], axis=1)
store_items

In [None]:
# Delete rows from a DataFrame
store_items = store_items.drop(['store 2', 'store 1'], axis=0)
store_items

In [None]:
store_items = store_items.rename(columns={'bikes': 'hats'})
store_items

In [None]:
store_items = store_items.rename(index={'store 3': 'last store'})
store_items

In [None]:
store_items = store_items.set_index('hats')
store_items

### Dealing with NaN

In [None]:
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35, 'shirts': 15, 'shoes': 8, 'suits': 45},
          {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants': 5, 'shirts': 2, 'shoes': 5, 'suits': 7},
          {'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4, 'shoes': 10}]
store_items = pd.DataFrame(items2, index=['store 1', 'store 2', 'store 3'])
store_items

In [None]:
# Count the total NaN values
x = store_items.isnull().sum().sum()
x

In [None]:
# Count the total non-NaN values
y = store_items.count()
y

In [None]:
# df.dropna(axis) method eliminates any rows with NaN values when axis = 0 is used and will eliminate any columns with NaN values when axis = 1 is used

In [None]:
# Tip: Remember, you learned that you can read axis = 0 as "down" and axis = 1 as "across" the given Numpy ndarray or Pandas dataframe object

In [None]:
# Drop rows having NaN values
store_items.dropna(axis=0, inplace=False)

In [None]:
# Drop columns having NaN values
store_items.dropna(axis=1, inplace=False)

### Substituting NaN Values

In [None]:
store_items.fillna(0)

In [None]:
store_items.fillna(method='ffill', axis=0)

In [None]:
store_items.fillna(method='ffill', axis=1)

In [None]:
store_items.fillna(method='backfill', axis=0)

In [None]:
store_items.fillna(method='backfill', axis=0, inplace=False)

In [None]:
store_items.interpolate(method='linear', axis=0)

In [None]:
store_items.interpolate(method='linear', axis=1)

### Loading Data into a pandas DataFrame

In [None]:
google_stock = pd.read_csv('../data/goog.csv')
google_stock.shape

In [None]:
google_stock

In [None]:
google_stock.head(3)

In [None]:
google_stock.tail(4)

In [None]:
google_stock.isnull().any()

In [None]:
google_stock.describe()

In [None]:
google_stock['Adj Close'].describe()

In [None]:
google_stock.max()

In [None]:
google_stock['Close'].min()

In [None]:
google_stock.pop('Date')
google_stock.mean()

In [None]:

google_stock.corr()

In [None]:
google_stock

In [None]:
data = pd.read_csv('../data/fake_company.csv')
data

In [None]:
data.groupby(['Year'])['Salary'].sum()

In [None]:
data.groupby(['Year'])['Salary'].mean()

In [None]:
data.groupby(['Name'])['Salary'].sum()

In [None]:
data.groupby(['Year', 'Department'])['Salary'].sum()

In [None]:
# https://pandas.pydata.org/pandas-docs/stable/user_guide/10min.html

In [1]:
# https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf

In [2]:
# UNPIVOT data
# pd.melt()