# <span style="color:maroon">**NRP Stock Simulation Software**</span>

###### <span style="color:green">**Import modules and define utility functions**</span>

In [1]:
import pandas as pd
import numpy as np
import math
import glob
from matplotlib import pyplot as plt
%matplotlib inline

In [8]:
# Utility Functions
def combine_dataframes(pattern, names, index_col, na_values, header=1, parse_dates=True, ignore_index=True):
    # creates a list of file pointers matching the pattern
    files = glob.glob(pattern)                        
    frames = []
   
    # iterate through each file pointer
    for f in files:
    
        # loads file into dataframe variable: df
        df = pd.read_csv(f, names=names, index_col=index_col, na_values=na_values,
                         header=header, parse_dates=parse_dates)                           
        
        # adds dataframe from df into list: frames
        frames.append(df)  
    
    # concatenate all dataframes in frames into a single dataframe
    return pd.concat(frames, ignore_index=ignore_index)    

def fillgaps(df, mode="fill_zero"):
    s = []
    if mode=="fill_zero":
        df.apply(lambda col: s.append(col.loc[col.first_valid_index():col.last_valid_index()].fillna(0)))
    elif mode=="fill_forward":
        df.apply(lambda col: s.append(col.loc[col.first_valid_index():col.last_valid_index()].ffill()))
    return pd.DataFrame(s).transpose()

def df_eda(df):
    print(df.head(), "\n")
    print(df.tail(), "\n")
    print(df.shape, "\n")
    print(df.columns, "\n")
    print(df.info(), "\n")
    print(df.dtypes, "\n")
    print(df.describe())

###### <span style="color:green">**Load raw stock data files**</span>

In [3]:
# load stock data into a dataframe: stocks
cols = ["ticker", "date", "close", "cap", "volume"]
stocks = combine_dataframes("./data/stock_data_*.csv", names=cols, index_col="date", na_values=" #N/A N/A ", ignore_index=False)

###### <span style="color:green">**Pivot stock data columns into tidy dataframes**</span>

In [19]:
close = pd.pivot_table(stocks, index="date", columns="ticker", values="close")
close = fillgaps(close, "fill_forward")

cap = pd.pivot_table(stocks, index="date", columns="ticker", values="cap")
cap = fillgaps(cap, "fill_forward")

volume = pd.pivot_table(stocks, index="date", columns="ticker", values="volume")
volume = fillgaps(volume, "fill_zero")

##### <span style="color:violet">**Examining data in the daily stock data dataframe**</span>

In [None]:
df_eda(prices)

In [None]:
print(close.shape, cap.shape, volume.shape, "\n")
print(close.tail(), "\n")
print(cap.tail(), "\n")
print(volume.tail())

In [None]:
# plot histogram of volume - note the use of logarithmic transforms of both the x and y axis
df.volume.plot(kind="hist", rot=70, logx=True, logy=True, figsize=(12, 6))

In [None]:
# Boxplots are great when you have a numeric column that you want to compare across different categories.
df.boxplot(column="volume", by="ticker", rot=90, figsize=(12,6))

In [None]:
# When you want to visualize two numeric columns, scatter plots are ideal.
# Notice the fan shapped pattern - why is that the case?
df[df["ticker"]=="MU"].plot(kind="scatter", x="close", y="cap", rot=90, figsize=(12,6))