In [9]:
# Importing the data
import pandas as pd
import numpy as np
import yfinance as yf # Previously installed
cutt_off_date = '2012-01-01' # year, month, day
df = pd.read_csv(r'C:\Users\IanCH\OneDrive\Documents\WF_Panda_Data.csv', index_col='Date', parse_dates =[0])

# Importing, setting date as index and settings date to a datetime object.
# Data Source: https://www.kaggle.com/datasets/varpit94/wells-fargo-stock-data-updated-till-30jun2021.

# Filtering a data frame to only show opens greater than zero.
df = [df[df.Open > 0]]

# Open will always be greater then zero, unless you drop off the market.

In [10]:
# Importing the same set of data using Yahoo Finance.

df = yf.download('WFC', start = '1972-06-01',end='2022-03-24')
df_ticker = yf.Ticker('WFC')

[*********************100%***********************]  1 of 1 completed


In [11]:
# Reverseing the dataframe
Reverseddf = df.reindex(index=df.index[::-1]) # start, stop, step
Reverseddf

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-03-23,52.759998,53.049999,51.099998,51.119999,49.466431,27988700
2022-03-22,52.410000,54.200001,52.299999,53.389999,51.663006,28625600
2022-03-21,51.660000,51.799999,50.610001,51.139999,49.485786,23411500
2022-03-18,50.849998,51.540001,50.040001,51.419998,49.756725,60963400
2022-03-17,51.020000,51.490002,50.230000,51.480000,49.814785,29930300
...,...,...,...,...,...,...
1972-06-07,0.895833,0.895833,0.888021,0.888021,0.148650,48000
1972-06-06,0.895833,0.911458,0.893229,0.895833,0.149958,196800
1972-06-05,0.882813,0.895833,0.880208,0.895833,0.149958,302400
1972-06-02,0.882813,0.890625,0.882813,0.882813,0.147779,172800


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 12562 entries, 1972-06-01 to 2022-03-23
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       12562 non-null  float64
 1   High       12562 non-null  float64
 2   Low        12562 non-null  float64
 3   Close      12562 non-null  float64
 4   Adj Close  12562 non-null  float64
 5   Volume     12562 non-null  int64  
dtypes: float64(5), int64(1)
memory usage: 945.0 KB


In [17]:
# Removing date from the index position so it can be used in the dictionary comprehension.
df = df.reset_index() # Can only be run once without error.

# Converting series to list for dictionary comprehension.
Date = df.Date.values.tolist()
Open = df.Open.values.tolist()
High = df.High.values.tolist()
Low = df.Low.values.tolist()
Close = df.Close.values.tolist()
Volume = df.Volume.values.tolist()


# Creating an index column to make iteration easier.
index_column = range(0,12564) # + 1 to include the last entry.

# Dictionary comprehension creates one big list with keys.
NewDict = {
    index_column: [Date, Open, High, Low, Close, Volume]
    for index_column, Date, Open, High, Low, Close, Volume in zip(
        index_column, Date, Open, High, Low, Close, Volume)
        }

# Outputting part of the dictionary to prevent Juypter Notebook from crashing.
print('Output from the dictionary comprehension:')
y = 1
while y < 15:
    print(NewDict[y][::])
    y +=1
print('...')
# y is the dictionary key.

# Zip creates a tuple and dict makes the first value into the dictionary key.
# The Zip function pairs the objects together as the list are iterated through.
# The downside is that the list is now a tuple with sub tuples making it hard to work with.

# overwrites the example above.
NewDict = dict((zip(df.Date, 
                    zip(df.Open, df.Close, 
                         zip(df.High, df.Low)))))

# A simple method for creating a dictionary.
Test_Dict = {}
for x in Date:
    for y in Open:
        Test_Dict[x] = Open[int(y)]
# In this example we are using the list created from the series above. 

Output from the dictionary comprehension:
[76291200000000000, 0.8828129768371582, 0.890625, 0.8828129768371582, 0.8828129768371582, 172800]
[76550400000000000, 0.8828129768371582, 0.8958330154418945, 0.8802080154418945, 0.8958330154418945, 302400]
[76636800000000000, 0.8958330154418945, 0.9114580154418945, 0.8932290077209473, 0.8958330154418945, 196800]
[76723200000000000, 0.8958330154418945, 0.8958330154418945, 0.8880209922790527, 0.8880209922790527, 48000]
[76809600000000000, 0.8880209922790527, 0.8958330154418945, 0.8854169845581055, 0.890625, 144000]
[76896000000000000, 0.890625, 0.8958330154418945, 0.8854169845581055, 0.890625, 43200]
[77155200000000000, 0.8958330154418945, 0.9010419845581055, 0.8958330154418945, 0.9010419845581055, 24000]
[77241600000000000, 0.8984379768371582, 0.8984379768371582, 0.8854169845581055, 0.8854169845581055, 76800]
[77328000000000000, 0.8854169845581055, 0.890625, 0.8854169845581055, 0.8854169845581055, 244800]
[77414400000000000, 0.890625, 0.89322900

In [29]:
# Creating a custom function for the descriptive statsitics.

def Summarize_Values(listvalues):
    ''' 
    Takes in a list of numerical values and provides summary information.
    Args:
        listvalues (float): List of values for the summarization

    Returns:
        Std: Standard deviation
        Var: Variance
        Average: Sum of values / length
        Unique_values: Creates a set from the list.
    '''    
    Average = sum(listvalues)/len(listvalues)
    Arraylist = np.array(listvalues)
    Var = np.var(Arraylist)
    Std = np.sqrt(np.var(Arraylist))
    Unique_values = set(listvalues) # for use in the function only.
    Largerthanaverage = Arraylist > Average # for use in the function only.
    TrueCountVal = np.count_nonzero(Largerthanaverage) # for use in the function only.
    print(f'{TrueCountVal} value(s) are greater than the average')
    print(Unique_values)
    print('Average - variance - Standard Deviation')
    return [Average, Var, Std] # values to output 

DictOpenList = []
DictCloseList = []

# Pulling values for open and close from the zip dictionary example.
for x,y in NewDict.items():
    DictOpenList.append(round((y[0]),1))
for x,y in NewDict.items():
    DictCloseList.append(round((y[1]),1))
# X is the index (dates) and y is the values.
# [1] is the second value in a list or tuple in Python.

# Running the statistics function on the open and close lists.
Average, Var, Std = Summarize_Values(DictOpenList)
print(f'{Average} / {Var} / {Std}')
print("-----")
Average1, Var1, Std1 = Summarize_Values(DictCloseList)
print(f'{Average1} / {Var1} / {Std1}')

5916 value(s) are greater than the average
{0.0, 1.0, 0.5, 1.5, 2.0, 2.5, 5.0, 6.0, 5.1, 6.1, 3.0, 3.5, 9.0, 10.0, 11.0, 14.1, 15.0, 16.0, 18.1, 19.0, 20.0, 4.0, 4.5, 21.1, 24.1, 24.2, 5.5, 25.0, 26.4, 29.0, 30.2, 6.5, 31.1, 30.4, 25.6, 26.0, 7.0, 7.5, 29.1, 30.1, 31.0, 8.0, 8.5, 34.4, 35.5, 36.0, 9.5, 44.3, 39.2, 40.3, 41.3, 10.5, 43.0, 44.4, 45.1, 46.0, 11.5, 48.3, 49.8, 50.0, 51.0, 12.0, 12.5, 54.2, 55.0, 56.0, 13.0, 13.5, 59.1, 60.3, 61.2, 14.5, 14.0, 64.2, 65.7, 15.5, 16.5, 17.0, 17.5, 18.5, 18.0, 19.5, 20.5, 21.5, 21.0, 22.5, 22.0, 23.5, 23.0, 24.0, 24.5, 25.5, 26.5, 27.0, 27.5, 28.0, 28.5, 29.5, 30.0, 30.5, 31.5, 32.0, 32.5, 33.5, 33.0, 34.0, 34.5, 35.0, 36.5, 37.0, 37.5, 38.0, 38.5, 39.5, 39.0, 40.5, 40.0, 41.5, 41.0, 42.5, 42.0, 43.5, 44.5, 44.0, 45.5, 45.0, 46.5, 47.5, 47.0, 48.5, 48.0, 49.5, 49.0, 50.5, 51.5, 52.0, 52.5, 53.0, 53.5, 54.0, 54.5, 55.5, 56.5, 57.0, 57.5, 58.0, 58.5, 59.0, 59.5, 60.0, 61.5, 61.0, 62.0, 62.5, 64.0, 64.9, 65.9, 65.4, 4.1, 4.6, 5.6, 6.6, 7.1, 7.6, 