# 01-Ins_Welcome

In [None]:
### Import Libraries and Dependencies

# Import libraries and dependencies
import pandas as pd
from pathlib import Path

### Read in CSV as Pandas DataFrame

# Read in CSV data
csv_path = Path('../Resources/fb_google_finance.csv')
fb_ticker_data = pd.read_csv(csv_path, index_col='Date', parse_dates=True, infer_datetime_format=True)
fb_ticker_data.head()

### Slice data for Feb 2019

# Slice data
fb_slice = fb_ticker_data.loc['2019-02-01':'2019-03-01']

### Calculate Daily Returns

# Calculate daily returns
fb_quarter_returns = fb_slice.pct_change()
fb_quarter_returns

# 02-Ins_Sorting

In [None]:
### Import Libraries and Dependencies

import pandas as pd

### Create DataFrame

painting_df = pd.DataFrame(
    [
        {"Painting": "Mona Lisa (Knockoff)", "Price": 25, "Popularity": "Very Popular"},
        {"Painting": "Van Gogh (Knockoff)", "Price": 20, "Popularity": "Popular"},
        {"Painting": "Starving Artist", "Price": 10, "Popularity": "Average"},
        {"Painting": "Toddler Drawing", "Price": 1, "Popularity": "Not Popular"},
    ]
)
painting_df

### Sort Data

# Sort ascending (default)
painting_df.sort_values("Price")

# Sort descending
painting_df.sort_values("Price", ascending=False)

### Sorting the index

painting_df.sort_index(ascending=False)

# Set the price as the index
painting_df = painting_df.set_index("Price")
painting_df

# Sort the index in descending order
painting_df.sort_index(ascending=False)

# 05-Ins_Groupby

In [None]:
### Import Libraries and Dependencies

import pandas as pd
from pathlib import Path
%matplotlib inline

### Read in File and Clean Data

# Read CSV
csv_path = Path('../Resources/crypto_data.csv')
crypto_data = pd.read_csv(csv_path, index_col='data_date', parse_dates=True, infer_datetime_format=True)
crypto_data

# Drop all columns cryptocurrency and data_priceUsd
crypto_data = crypto_data.drop(columns=['data_time','timestamp'])

# Sort the dates in ascending order
crypto_data = crypto_data.sort_index()

# Drop missing values
crypto_data = crypto_data.dropna()
crypto_data.head()

### Group DataFrame and perform `count` aggregation

# Group by crypto data by cryptocurrency and perform count
crypto_data_grp = crypto_data.groupby('cryptocurrency').count()
crypto_data_grp

### Group DataFrame without aggregate function

# Group by crypto data by cryptocurrency
crypto_data_grp = crypto_data.groupby('cryptocurrency')
crypto_data_grp

### Group DataFrame by `cryptocurrency` and calculate the average `data_priceUsd`

# Calculate average data_priceUsd for each crypto
crypto_data_mean = crypto_data.groupby('cryptocurrency').mean()
crypto_data_mean

### Group by more than one column and calculate count

# Group by more than one column
multi_group = crypto_data.groupby(['cryptocurrency','data_priceUsd'])['data_priceUsd'].count()
multi_group

### Group by more than one column, round price to two decimal places, and calculate count

# Group by more than one column after rounding to two decimal places
rounded_crypto_data = crypto_data.round({'data_priceUsd': 2})

multi_group = rounded_crypto_data.groupby(['cryptocurrency','data_priceUsd'])['data_priceUsd'].count()
multi_group

### Compare single column grouping to multicolumn grouping

# Compare one column group with multiple column group
single_group = crypto_data.groupby('cryptocurrency')['data_priceUsd'].count()
single_group

### Plot grouped data to generate more than one line on the same chart

# Plot data_priceUsd for each crypto across time
grouped_cryptos = crypto_data.groupby('cryptocurrency')['data_priceUsd'].plot(legend=True)
grouped_cryptos

# 08-Ins_Multi_Indexing

In [None]:
### Import Libraries and Dependencies

import pandas as pd
from pathlib import Path

### Read in CSV as Pandas DataFrame and Set the Index

# Read in data
csv_path = Path("../Resources/twtr_google_finance.csv")
ticker_data = pd.read_csv(csv_path, parse_dates=True, index_col='Date', infer_datetime_format=True)
ticker_data.head()

### Display DataFrame Index

ticker_data.index

### Create Multiple Indices by Grouping By DatetimeIndex `year`, `month`, and `day` with `first` Function

# Group by year, month, and day and grab first of each group
ticker_data_grp = ticker_data.groupby([ticker_data.index.year, ticker_data.index.month, ticker_data.index.day]).first()
ticker_data_grp

### Create Multiple Indices by Grouping By DatetimeIndex `year` and `month` with `first` Function

# Group by year and month and take the first value of each group
ticker_data_grp_2 = ticker_data.groupby([ticker_data.index.year, ticker_data.index.month]).first()
ticker_data_grp_2

### Create Multiple Indices by Grouping By DatetimeIndex `year` and `month` with `last` Function

# Group by year and month and take the last value of each group
ticker_data_grp_3 = ticker_data.groupby([ticker_data.index.year, ticker_data.index.month]).last()
ticker_data_grp_3

### Create Multiple Indices by Grouping By DatetimeIndex `year` and `month` with `mean` Function

# Group by year and month and calculate the average of each group
ticker_data_grp_4 = ticker_data.groupby([ticker_data.index.year, ticker_data.index.month]).mean()
ticker_data_grp_4

### Slice Data for 4/12/2019

# Slice data for 4/12/2019 from first group
ticker_data_slice = ticker_data_grp.loc[2019,4,12]
ticker_data_slice

### Slice Data For All Days in April 2019

# Slice data for April 2019 from first group
ticker_data_slice = ticker_data_grp.loc[2019,4]
ticker_data_slice

# 12-Ins_Concat_DataFrame

In [None]:
### Import Libraries and Dependencies

import pandas as pd
from pathlib import Path

### Read in files

# Import data
france_data_path = Path('../Resources/france_products.csv')
uk_data_path = Path('../Resources/uk_products.csv')
netherlands_data_path = Path('../Resources/netherlands_products.csv')
customer_data_path = Path('../Resources/customer_info.csv')
products_data_path = Path('../Resources/products.csv')

# Read in data and index by CustomerID
france_data = pd.read_csv(france_data_path, index_col='CustomerID')
uk_data = pd.read_csv(uk_data_path, index_col='CustomerID')
netherlands_data = pd.read_csv(netherlands_data_path, index_col='CustomerID')
customer_data = pd.read_csv(customer_data_path, index_col='CustomerID')
products_data = pd.read_csv(products_data_path, index_col='CustomerID')

### Output sample of data

# Show sample of France data
france_data.head()

# Show sample of UK data
uk_data.head()

# Show sample of Netherlands data
netherlands_data.head()

### Concatenate data by rows using `concat` function and `inner` join

# Join UK, France, and Netherlands full datasets by axis
joined_data_rows = pd.concat([france_data, uk_data, netherlands_data], axis="rows", join="inner")
joined_data_rows

### Concatenate data by column using `concat` function and `inner` join

# Show sample of customer data
customer_data.head()

# Show sample of product data
products_data.head()

# Join Customer and products by columns axis
joined_data_cols = pd.concat([customer_data, products_data], axis='columns', join='inner')
joined_data_cols.head()

# 15-Ins_Std_Dev_Risk

In [None]:
### Import Libraries and Dependencies

import pandas as pd
import numpy as np
from pathlib import Path
%matplotlib inline

### Read data

file_path = Path("../Resources/tech_stocks_closing_value_2018.csv")
tech_stocks = pd.read_csv(file_path, index_col='Date', parse_dates=True)
tech_stocks.head()

### Calculate Daily Returns

# Daily Returns
daily_returns = tech_stocks.pct_change()
daily_returns.head()

### Calcualte standard deviation using `std` function

# Daily Standard Deviations
daily_std = daily_returns.std()
daily_std.head()

### Sort standard deviation in desc order

# Identify the stock with the most risk
daily_std = daily_std.sort_values(ascending=False)
daily_std.head()

### Calculate the annualized standard deviation

# Calculate the annualized standard deviation (252 trading days)
annualized_std = daily_std * np.sqrt(252)
annualized_std.head()

### Plot standard deviation for 3 different portfolios to determine which has the most risk

portfolio_a_std = np.random.normal(scale=0.5, size=10000)
portfolio_b_std = np.random.normal(scale=1.0, size=10000)
portfolio_c_std = np.random.normal(scale=1.5, size=10000)

portfolio_std = pd.DataFrame({
    "0.5": portfolio_a_std,
    "1.0": portfolio_b_std,
    "1.5": portfolio_c_std
})

portfolio_std.plot.hist(stacked=True, bins=100)

### Plot standard deviation using box plot

# Plot box plot
portfolio_std.plot.box()

# 16-Ins_Sharpe_Ratios

In [None]:
### Import Libraries and Dependencies

import pandas as pd
import numpy as np
from pathlib import Path
#!pip install quandl
import quandl
%matplotlib inline

### Read data

portfolio_a_path = Path("../Resources/tech_stocks_2018_a.csv")
portfolio_b_path = Path("../Resources/tech_stocks_2018_b.csv")
risk_free_rate_path= Path("../Resources/risk_free_rate.csv")

portfolio_a = pd.read_csv(portfolio_a_path, index_col='Date', parse_dates=True, infer_datetime_format=True)
portfolio_b = pd.read_csv(portfolio_b_path, index_col='Date', parse_dates=True, infer_datetime_format=True)
risk_free_rate = pd.read_csv(risk_free_rate_path, index_col='Date', parse_dates=True, infer_datetime_format=True)

portfolio_a.head()

### Calculate Annualized Std Dev

# Calculate daily returns
portfolio_a_returns = portfolio_a.pct_change().dropna()
portfolio_b_returns = portfolio_b.pct_change().dropna()

# Concat returns into one DataFrame
all_portfolios_returns = pd.concat([portfolio_a_returns, portfolio_b_returns, risk_free_rate], axis='columns', join='inner')
all_portfolios_returns.head()

# Calculate Sharpe Ratio
sharpe_ratios = ((all_portfolios_returns.mean()-all_portfolios_returns['rf_rate'].mean()) * 252) / (all_portfolios_returns.std() * np.sqrt(252))
sharpe_ratios

### Plot sharpe ratios

# Plot sharpe ratios
sharpe_ratios.plot(kind="bar", title="Sharpe Ratios")