In [28]:
import pandas as pd

google = pd.read_csv('data/google_june_2020.csv', index_col = 0)
msft = pd.read_csv('data/msft_june_2020.csv', index_col = 0)
amzn = pd.read_csv('data/amzn_june_2020.csv', index_col = 0)


In [None]:
google.info

In [None]:
google.head(2)

### Concatenation of Tidy Data Source

In [29]:
all_df = pd.DataFrame()
all_df = all_df.append([google, msft, amzn])

In [30]:
all_df.info()
all_df.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 66 entries, 0 to 21
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    66 non-null     object 
 1   Close   66 non-null     float64
 2   Volume  66 non-null     int64  
 3   Symbol  66 non-null     object 
dtypes: float64(1), int64(1), object(2)
memory usage: 2.6+ KB


Unnamed: 0,Date,Close,Volume,Symbol
0,2020-06-01,1434.869995,1258100,GOOGL
1,2020-06-02,1442.310059,1172100,GOOGL
2,2020-06-03,1439.25,1386600,GOOGL
3,2020-06-04,1414.300049,1349100,GOOGL
4,2020-06-05,1440.02002,2132100,GOOGL


In [None]:
all_df.reset_index(drop=True, inplace=True)

### Concatenation of Tidy Data Source - pd.concat()

In [None]:
all_df = pd.concat([google, msft, amzn])
all_df.reset_index(drop=True, inplace=True)
all_df.head()

In [None]:
all_df.shape

### Lecture 4 - Multi-level Indexing

In [None]:
google = pd.read_csv('data/google_june_2020.csv', index_col = 0)
msft = pd.read_csv('data/msft_june_2020.csv', index_col = 0)
amzn = pd.read_csv('data/amzn_june_2020.csv', index_col = 0)
all_df = pd.concat([google, msft, amzn])
all_df.reset_index(drop=True, inplace=True)
all_df.head()

In [None]:
all_df.set_index(['Symbol', 'Date'], inplace=True)

In [None]:
all_df.head(2)

In [None]:
all_df.sort_index(inplace=True)
all_df.head()

In [None]:
all_df.loc[('AMZN', '2020-06-01'),:]

In [None]:
all_df.loc[('AMZN', '2020-06-01'),'Close']

In [None]:
all_df.index.levels

In [None]:
all_df.index.levels[0]

In [None]:
all_df.index.levels[1]

In [None]:
all_df.loc[(['AMZN','GOOGL'], '2020-06-01'),'Close']

### Lecture 5 - Merging Tidy Data

In [None]:
google = pd.read_csv('data/google_june_2020.csv', index_col = 0)
msft = pd.read_csv('data/msft_june_2020.csv', index_col = 0)
amzn = pd.read_csv('data/amzn_june_2020.csv', index_col = 0)
all_df = pd.concat([google, msft, amzn])
all_df.reset_index(drop=True, inplace=True)
all_df.head()

In [None]:
all_df_columns = pd.concat([google, msft, amzn], axis = 1)

In [None]:
all_df_columns.head()

In [None]:
pd.concat([google, msft, amzn], axis = 1, verify_integrity=True)

In [None]:
def new_agg_df(df):
    
    my_list = []
    symbol = df['Symbol'].drop_duplicates().values[0]
    df.drop(['Symbol'], axis=1, inplace=True)
    df.set_index('Date', inplace=True)
    my_list = [symbol+'_'+elem for elem in df.columns]
    df.columns = my_list


    return df

In [None]:
amzn_agg = new_agg_df(amzn)
msft_agg = new_agg_df(msft)
google_agg = new_agg_df(google)

In [None]:
amzn_agg.head()

In [None]:
amzn_agg.reset_index().merge(msft_agg.reset_index(), on = 'Date').head()

In [None]:
amzn_agg.reset_index().merge(msft_agg.reset_index(), left_on = 'Date', right_on = 'Date', how='inner').head()

In [None]:
all_df_agg = amzn_agg.merge(msft_agg, left_index=True, right_index=True).merge(
    google_agg,  left_index=True, right_index=True
)

all_df_agg.head()

### Lecture 6 - Transformation of a Dataset

In [None]:
google = pd.read_csv('data/google_june_2020.csv', index_col = 0)
msft = pd.read_csv('data/msft_june_2020.csv', index_col = 0)
amzn = pd.read_csv('data/amzn_june_2020.csv', index_col = 0)
all_df = pd.concat([google, msft, amzn])
all_df.reset_index(drop=True, inplace=True)
all_df.head()

In [None]:
pivot_clone = all_df.pivot(
    index = 'Date',
    columns = 'Symbol',
    values = 'Close'
).sort_index()

In [None]:
pivot_clone.head()

In [None]:
pivot_clone_volume = all_df.pivot(
    index = 'Date',
    columns = 'Symbol',

).sort_index()
pivot_clone_volume.head()

In [None]:
another_df = all_df.set_index(['Symbol','Date'])
unstacked_df = another_df.unstack(level = 'Symbol')
unstacked_df.head()

In [None]:
unstacked_df.stack(level='Symbol').head()

In [None]:
stacked_df = unstacked_df.stack(level = 'Symbol')
swapped_df = stacked_df.swaplevel('Symbol', 'Date')
swapped_df.head()

In [None]:
swapped_df.sort_index().head()

### Melting with Pandas

In [None]:
all_df.melt(id_vars=['Symbol','Date'])

### Aggregating Results with Pandas

In [None]:
pivot_close = all_df.pivot(
    index = 'Symbol',
    columns = 'Date',
    values = 'Close'
).sort_index().reset_index()


mean_pivot = pd.DataFrame()

mean_pivot['Symbol']=pivot_close['Symbol'].to_list()
mean_pivot['average_price'] = pivot_close.set_index('Symbol').apply(
    lambda x: x.mean(), axis = 1
).to_list()

mean_pivot.set_index('Symbol')


In [None]:
# same result, but with only a few lines of code

all_df.pivot_table(
    index='Symbol',
    values='Close',
    aggfunc='mean',
)

In [None]:
# same result, but with only a few lines of code

all_df.pivot_table(
    index='Symbol',
    values='Close',
    aggfunc=['mean','median','min','max','std']
)

### The groupby in Pandas

In [None]:
grouped_stocks = all_df.groupby('Symbol')['Close'].mean()
grouped_stocks.head()

In [None]:
grouped_stocks.to_frame()

In [None]:
grouped_stocks = all_df.groupby('Symbol')['Close'].agg(['mean','median', 'max'])
grouped_stocks

In [None]:
grouped_stocks = all_df.groupby('Symbol').agg({'Close':['median','mean'],'Volume': ['max']})
grouped_stocks

### Lecture 7 - Plotting Results with Pandas

In [None]:
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter(action='ignore')

In [None]:
import pandas as pd 

google = pd.read_csv('data/google_june_2020.csv', index_col = 0)
msft = pd.read_csv('data/msft_june_2020.csv', index_col = 0)
amzn = pd.read_csv('data/amzn_june_2020.csv', index_col = 0)
all_df = pd.concat([google, msft, amzn])
all_df.reset_index(drop=True, inplace=True)

pivot_close = all_df.pivot(
    index = 'Date',
    columns = 'Symbol',
    values = 'Close'
).sort_index()

In [None]:
pivot_close.plot()

In [None]:
def daily_change(row):
    return row/row[0]

In [None]:
new_df = pivot_close.apply(lambda x: daily_change(x))
new_df.head()

In [None]:
new_df.plot()

In [None]:
def daily_returns(row):
    return row.pct_change(1)

In [None]:
daily_df = pivot_close.apply(lambda x: daily_returns(x))
daily_df.head()

In [None]:
daily_df.plot()

In [None]:
pivot_volume = all_df.pivot(
    index = 'Date',
    columns= 'Symbol',
    values = 'Volume'
).sort_index()

In [None]:
pivot_volume.plot.area()

In [None]:
pivot_volume.plot.bar(stacked = True)