- S. Jansen Datacamp

##  select sub-periods from a time series.

In [None]:
# Create dataframe prices here
prices = pd.DataFrame()

# Select data for each year and concatenate with prices here 
for year in ["2013", "2014", "2015"]:
    price_per_year = yahoo.loc[year, ["price"]].reset_index(drop=True)
    price_per_year.rename(columns={"price": year}, inplace=True)
    prices = pd.concat([prices, price_per_year], axis=1)

# Plot prices
prices.info()
prices.plot(subplots=True)
plt.show()

## Shifting stock prices across time


In [None]:
# Import data here
google = pd.read_csv("google.csv", parse_dates =["Date"], index_col="Date")
google.head()

# Set data frequency to business daily
google = google.asfreq("B")

# Create 'lagged' and 'shifted'
google['lagged'] = google.Close.shift(-90)
google['shifted'] = google.Close.shift(90)

# Plot the google price series
google.plot(subplots=True)
plt.show()

## absolute changes from current and shifted prices,

In [None]:

# Created shifted_30 here
yahoo['shifted_30'] = yahoo.price.shift(30)

# Subtract shifted_30 from price
yahoo['change_30'] = yahoo.price.sub(yahoo.shifted_30)

# Get the 30-day price difference
yahoo['diff_30'] = yahoo['price'].diff(periods=30)

# Inspect the last five rows of price
print(yahoo.tail())

# Show the value_counts of the difference between change_30 and diff_30
print( yahoo.change_30.sub(yahoo.diff_30).value_counts())

# Plotting multi-period returns


In [None]:
# Create daily_return
google['daily_return'] = google.Close.pct_change(periods=1)*100

# Create monthly_return
google['monthly_return'] = google.Close.pct_change(periods=30)*100

# Create annual_return
google['annual_return'] = google.Close.pct_change(periods=360)*100

# Plot the result
google.plot(subplots=True)
plt.show()

## easily compare several time series by normalizing their starting points to 100

In [None]:
# Import data here
prices = pd.read_csv("asset_classes.csv",parse_dates=["DATE"],index_col="DATE")

# Inspect prices here
print(prices.info())

# Select first prices
first_prices = prices.iloc[0]
first_prices

# Create normalized
normalized = prices.div(first_prices).mul(100)

# Plot normalized
normalized.plot()

# Chapter 2, 'Basic Time Series Metrics & Resampling'.

## compare the performance of various stocks against a benchmark.

In [None]:
# Import stock prices and index here
stocks = pd.read_csv("nyse.csv",parse_dates=["date"],index_col="date")
dow_jones = pd.read_csv("dow_jones.csv",parse_dates=["date"],index_col="date")

# Concatenate data and inspect result here
data = pd.concat([stocks,dow_jones],axis=1)
print(data.info())

# Normalize and plot your data here
data.div(data.iloc[0]).mul(100).plot()
plt.show()

## Plot performance difference vs benchmark index


In [None]:
# Create tickers
tickers = ["MSFT","AAPL"]

# Import stock data here
stocks = pd.read_csv("msft_aapl.csv",parse_dates=["date"],index_col="date")

# Import index here
sp500 = pd.read_csv("sp500.csv",parse_dates=["date"],index_col="date")

# Concatenate stocks and index here
data = pd.concat([stocks,sp500],axis=1).dropna()
# Normalize data
normalized = data.div(data.iloc[0]).mul(100)

# Subtract the normalized index from the normalized stock prices, and plot the result
(normalized[tickers].sub(normalized["SP500"], axis=0)).plot()
plt.show()

## Convert monthly to weekly data


In [None]:
# Set start and end dates
start = '2016-1-1'
end = '2016-2-29'

# Create monthly_dates here
monthly_dates = pd.date_range(start=start, end=end, freq='M')

# Create monthly here
monthly = pd.Series(data=[1,2], index=monthly_dates)
print(monthly)

# Create weekly_dates here
weekly_dates = pd.date_range(start=start, end=end, freq='W')

# Print monthly, reindexed using weekly_dates
print(monthly.reindex(weekly_dates))
print(monthly.reindex(weekly_dates, method='bfill'))
print(monthly.reindex(weekly_dates, method='ffill'))

## Create weekly from monthly unemployment data

In [None]:
# Import data here
data = pd.read_csv('unemployment.csv', parse_dates=['date'], index_col='date')

# Show first five rows of weekly series
print(data.asfreq('W').head())

# Show first five rows of weekly series with bfill option
print(data.asfreq('W', method='bfill').head())

# Create weekly series with ffill option and show first five rows
weekly_ffill = data.asfreq('W', method='ffill')
print(weekly_ffill.head())

# Plot weekly_fill starting 2015 here 
weekly_ffill.loc['2015':].plot()
plt.show()


## Use interpolation to create weekly employment data


In [None]:
# Inspect data here
print(monthly.info())

# Create weekly dates
weekly_dates = pd.date_range(monthly.index.min(), monthly.index.max(), freq='W')

# Reindex monthly to weekly data
weekly = monthly.reindex(weekly_dates)

# Create ffill and interpolated columns
weekly['ffill'] = weekly.UNRATE.ffill()
weekly['interpolated'] = weekly.UNRATE.interpolate()

# Plot weekly
weekly.plot()
plt.show()

## Interpolate debt/GDP and compare to unemployment


In [None]:
# Import & inspect data here
data = pd.read_csv('debt_unemployment.csv', parse_dates=["date"],index_col="date")
print(data.info())

# Interpolate and inspect here
interpolated = data.interpolate()
print(interpolated.info())

# Plot interpolated data here
interpolated.plot(secondary_y= "Unemployment")
plt.show()

## Compare weekly, monthly and annual ozone trends for NYC & LA


In [None]:
# Import and inspect data here
ozone = pd.read_csv('ozone.csv', parse_dates=["date"],index_col="date")
print(ozone.info())

# Calculate and plot the weekly average ozone trend
ozone.resample("W").mean().plot()
plt.show()
# Calculate and plot the monthly average ozone trend
ozone.resample("M").mean().plot()
plt.show()

# Calculate and plot the annual average ozone trend
ozone.resample("A").mean().plot()
plt.show()

### Compare monthly average stock prices for Facebook and Google


In [None]:
# Import and inspect data here
stocks = pd.read_csv('stocks.csv', parse_dates=["date"],index_col="date")
print(stocks.info())

# Calculate and plot the monthly averages
monthly_average = stocks.resample("M").mean()
monthly_average.plot(subplots=True)
plt.show()

## Compare quarterly GDP growth rate and stock returns


In [None]:
# Import and inspect gdp_growth here
gdp_growth = pd.read_csv('gdp_growth.csv', parse_dates=["date"],index_col="date")
print(gdp_growth.info())


# Import and inspect djia here
djia = pd.read_csv("djia.csv",parse_dates=["date"],index_col="date")
print(djia.info())

# Calculate djia quarterly returns here 
djia_quarterly = djia.resample("QS").first()
djia_quarterly_return = djia_quarterly.pct_change().mul(100)

# Concatenate, rename and plot djia_quarterly_return and gdp_growth here 
data = pd.concat([gdp_growth,djia_quarterly_return],axis=1)
data.columns = ["gdp","djia"]
data.plot()
plt.show()

## Visualize monthly mean, median and standard deviation of S&P500 returns


In [None]:
# Import data here
sp500 = pd.read_csv("sp500.csv",parse_dates=["date"],index_col="date")
sp500.head()

# Calculate daily returns here
daily_returns = sp500.squeeze().pct_change()
daily_returns.head()
# Resample and calculate statistics
stats = daily_returns.resample("M").agg(["mean","median","std"])

# Chapter 3: Window Functions: Rolling & Expanding Metrics

- Rolling average air quality since 2010 for new york city


In [None]:
# Import and inspect ozone data here
data = pd.read_csv("ozone.csv",parse_dates=["date"],index_col=["date"])
print(data.info())

# Calculate 90d and 360d rolling mean for the last price
data['90D'] = data.Ozone.rolling("90D").mean()
data['360D'] = data.Ozone.rolling("360D").mean()

# Plot data
data.loc["2010":].plot()
plt.title("New York City")
plt.show()

- Rolling 360-day median & std. deviation for nyc ozone data since 2000


In [None]:
# Import and inspect ozone data here
data = pd.read_csv("ozone.csv",parse_dates=["date"],index_col="date").dropna()
print(data.info())

# Calculate the rolling mean and std here
rolling_stats = data.Ozone.rolling(360).agg(["mean","std"])

# Join rolling_stats with ozone data
stats = data.join(rolling_stats)

# Plot stats
stats.plot(subplots=True)
plt.show()

- Rolling quantiles for daily air quality in nyc

changes in the dispersion of a time series over time in a way that is less sensitive to outliers than using the mean and standard deviation.

In [None]:
# Resample, interpolate and inspect ozone data here
data = data.resample("D").interpolate()
print (data.info())
# Create the rolling window
rolling = data.Ozone.rolling(360)

# Insert the rolling quantiles to the monthly returns
data['q10'] = rolling.quantile(.1).to_frame("Q10")
data['q50'] = rolling.quantile(.5).to_frame("Q50")
data['q90'] = rolling.quantile(.9).to_frame("Q90")

# Plot monthly returns
data.plot()
plt.show()

### Expanding window functions with pandas
- Cumulative sum vs .diff()

In [None]:
# Calculate differences
differences = data.diff().dropna()
differences.head()
# Select start price
start_price = data.first("D")

# Calculate cumulative sum
cumulative_sum = start_price.append(differences).cumsum()

# Validate cumulative sum equals data
print(data.equals(cumulative_sum))

- Cumulative return on $1,000 invested in google vs apple I


In [None]:
# Define your investment
investment = 1000

# Calculate the daily returns here
returns = data.pct_change()

# Calculate the cumulative returns here
returns_plus_one = returns+1
cumulative_return = returns_plus_one.cumprod()

z = data.pct_change().add(1).cumprod()
z.equals(cumulative_return)


# Calculate and plot the investment return here 
cumulative_return.mul(investment).plot()

- Rolling yearly returns on $1,000 invested in google vs apple II


In [None]:
# Import numpy
import numpy as np

# Define a multi_period_return function
def multi_period_return(period_returns):
    return np.prod(period_returns+1)-1
    
# Calculate daily returns
daily_returns = data.pct_change()

# Calculate rolling_annual_returns
rolling_annual_returns = daily_returns.rolling("360D").apply(multi_period_return).mul(100)
# Plot rolling_annual_returns
rolling_annual_returns.plot()
plt.show()

- create a random walk of returns by sampling from actual returns, and how to use this random sample to create a random stock price path.

In [None]:
# Set seed here
seed = 42

# Create random_walk
random_walk = normal(loc=0.001,scale=0.01,size=2500)

# Convert random_walk to pd.series
random_walk = pd.Series(random_walk)

# Create random_prices
random_prices = random_walk.add(1).cumprod()

# Plot random_prices here
random_prices.mul(1000).plot()
plt.show()

- build a random walk using historical returns from Facebook's stock price since IPO through the end of May 31, 2017. Then you'll simulate an alternative random price path in the next exercise.

In [None]:
# Set seed here
seed = 42

# Calculate daily_returns here
daily_returns = fb.pct_change().dropna()
# Get n_obs
n_obs = daily_returns.count()
# Create random_walk
random_walk = choice(daily_returns,size=n_obs)
# Convert random_walk to pd.series
random_walk = pd.Series(random_walk)
# Plot random_walk distribution
sns.distplot(random_walk)
plt.show()

- random sample of returns like the one you've generated during the last exercise and use it to create a random stock price path.

In [None]:
# Select fb start price here
start = fb.price.first('D')

# Add 1 to random walk and append to start
random_walk = random_walk.add(1)
random_price = start.append(random_walk)

# Calculate cumulative product here
random_price = random_price.cumprod()

# Insert into fb and plot
fb['random'] = random_price
fb.plot()
plt.show()


### Relationships between time series: correlation
- Relationships between time series: correlation

- Annual return correlations among several stocks


In [None]:
# Inspect data here
print(data.info())

# Calculate year-end prices here
annual_prices = data.resample("A").last()

# Calculate annual returns here
annual_returns = annual_prices.pct_change()

# Calculate and print the correlation matrix here
correlations = annual_returns.corr()
print(correlations)

# Visualize the correlations as heatmap here
sns.heatmap(correlations, annot=True)
plt.show()


------------------------- END OF TAUGHT COURSE -------------------------------