# <span style="color:maroon">**NRP Stock Simulation Software**</span>

#### <span style="color:green">**Pull in daily stock data**</span>

In [None]:
import pandas as pd
import glob

In [None]:
# Utility Functions
def combine_dataframes(pattern, names, index_col, na_values, header=1, parse_dates=True, ignore_index=True):
    # creates a list of file pointers matching the pattern
    files = glob.glob(pattern)                        
    frames = []
   
    # iterate through each file pointer
    for f in files:
    
        # loads file into dataframe variable: df
        df = pd.read_csv(f, names=names, index_col=index_col, na_values=na_values)                           
        
        # adds dataframe from df into list: frames
        frames.append(df)  
    
    # concatenate all dataframes in frames into a single dataframe
    return pd.concat(frames, ignore_index=ignore_index)       

def basic_eda(df):
    print(df.head(), "\n")
    print(df.tail(), "\n")
    print(df.shape, "\n")
    print(df.columns, "\n")
    print(df.info(), "\n")
    print(df.describe())

In [None]:
# load stock data into a dataframe: stocks
col_names = ["ticker", "date", "close", "cap", "volume"]
stocks = combine_dataframes("./data/stock_data_*.csv", names=col_names, index_col="date", na_values=" #N/A N/A ", ignore_index=False)

##### <span style="color:violet">**Examining data in the daily stock data dataframe**</span>

In [None]:
basic_eda(stocks)

In [None]:
print(df.ticker.value_counts(dropna=False), "\n")
print(df["ticker"].value_counts(dropna=False), "\n")     # same as previous lines
print(df[df.volume > 1000000000], "\n")                  # basic filtering - version 1
print(df[df["volume"] > 1000000000])                     # basic filtering - version 2

##### <span style="color:violet">**Visualizing data in the daily stock data dataframe**</span>

In [None]:
import matplotlib.pyplot as mpl
%matplotlib inline

# plot histogram of volume - note the use of logarithmic transforms of both the x and y axis
df.volume.plot(kind="hist", rot=70, logx=True, logy=True, figsize=(12, 6))

In [None]:
# Boxplots are great when you have a numeric column that you want to compare across different categories.
df.boxplot(column="volume", by="ticker", rot=90, figsize=(12,6))

In [None]:
# When you want to visualize two numeric columns, scatter plots are ideal.
# Notice the fan shapped pattern - why is that the case?
df[df["ticker"]=="MU"].plot(kind="scatter", x="close", y="cap", rot=90, figsize=(12,6))

#### <span style="color:green">**Pivot daily stock data into three separate dataframes**</span>

In [None]:
prices = pd.pivot_table(df, values="close", index="date", columns="ticker")
caps = pd.pivot_table(df, values="cap", index="date", columns="ticker")
volumes = pd.pivot_table(df, values="volume", index="date", columns="ticker")

In [None]:
basic_eda(prices)

In [None]:
basic_eda(caps)

In [None]:
basic_eda(volumes)

In [None]:
def fillgaps(data, mode="forwardfill"):
    s=[]
    if mode == "forwardfill":
        data.apply(lambda col: s.append(col.loc[col.first_valid_index():col.last_valid_index()].ffill()))
    elif mode == "fillzeros":
        data.apply(lambda col: s.append(col.loc[col.first_valid_index():col.last_valid_index()].fillna(0)))
    return pd.DataFrame(s).transpose()

In [None]:
prices = fillgaps(prices)
caps = fillgaps(caps)
volumes = fillgaps(volumes, mode="fillzeros")

In [None]:
basic_eda(prices)

In [None]:
basic_eda(caps)

In [None]:
basic_eda(volumes)