In [None]:
import pandas as pd

google = pd.read_csv('data/google_june_2020.csv', index_col = 0)
msft = pd.read_csv('data/msft_june_2020.csv', index_col = 0)
amzn = pd.read_csv('data/amzn_june_2020.csv', index_col = 0)


In [None]:
google.info

In [None]:
google.head(2)

### Concatenation of Tidy Data Source

In [None]:
all_df = pd.DataFrame()
all_df = all_df.append([google, msft, amzn])

In [None]:
all_df.info()
all_df.head()

In [None]:
all_df.reset_index(drop=True, inplace=True)

### Concatenation of Tidy Data Source - pd.concat()

In [None]:
all_df = pd.concat([google, msft, amzn])
all_df.reset_index(drop=True, inplace=True)
all_df.head()

In [None]:
all_df.shape

### Lecture 4 - Multi-level Indexing

In [None]:
google = pd.read_csv('data/google_june_2020.csv', index_col = 0)
msft = pd.read_csv('data/msft_june_2020.csv', index_col = 0)
amzn = pd.read_csv('data/amzn_june_2020.csv', index_col = 0)
all_df = pd.concat([google, msft, amzn])
all_df.reset_index(drop=True, inplace=True)
all_df.head()

In [None]:
all_df.set_index(['Symbol', 'Date'], inplace=True)

In [None]:
all_df.head(2)

In [None]:
all_df.sort_index(inplace=True)
all_df.head()

In [None]:
all_df.loc[('AMZN', '2020-06-01'),:]

In [None]:
all_df.loc[('AMZN', '2020-06-01'),'Close']

In [None]:
all_df.index.levels

In [None]:
all_df.index.levels[0]

In [None]:
all_df.index.levels[1]

In [None]:
all_df.loc[(['AMZN','GOOGL'], '2020-06-01'),'Close']

### Lecture 5 - Merging Tidy Data

In [None]:
google = pd.read_csv('data/google_june_2020.csv', index_col = 0)
msft = pd.read_csv('data/msft_june_2020.csv', index_col = 0)
amzn = pd.read_csv('data/amzn_june_2020.csv', index_col = 0)
all_df = pd.concat([google, msft, amzn])
all_df.reset_index(drop=True, inplace=True)
all_df.head()

In [None]:
all_df_columns = pd.concat([google, msft, amzn], axis = 1)

In [None]:
all_df_columns.head()

In [None]:
pd.concat([google, msft, amzn], axis = 1, verify_integrity=True)

In [None]:
def new_agg_df(df):
    
    my_list = []
    symbol = df['Symbol'].drop_duplicates().values[0]
    df.drop(['Symbol'], axis=1, inplace=True)
    df.set_index('Date', inplace=True)
    my_list = [symbol+'_'+elem for elem in df.columns]
    df.columns = my_list


    return df

In [None]:
amzn_agg = new_agg_df(amzn)
msft_agg = new_agg_df(msft)
google_agg = new_agg_df(google)

In [None]:
amzn_agg.head()

In [None]:
amzn_agg.reset_index().merge(msft_agg.reset_index(), on = 'Date').head()

In [None]:
amzn_agg.reset_index().merge(msft_agg.reset_index(), left_on = 'Date', right_on = 'Date', how='inner').head()

In [None]:
all_df_agg = amzn_agg.merge(msft_agg, left_index=True, right_index=True).merge(
    google_agg,  left_index=True, right_index=True
)

all_df_agg.head()

### Lecture 6 - Transformation of a Dataset

In [None]:
google = pd.read_csv('data/google_june_2020.csv', index_col = 0)
msft = pd.read_csv('data/msft_june_2020.csv', index_col = 0)
amzn = pd.read_csv('data/amzn_june_2020.csv', index_col = 0)
all_df = pd.concat([google, msft, amzn])
all_df.reset_index(drop=True, inplace=True)
all_df.head()

In [None]:
pivot_clone = all_df.pivot(
    index = 'Date',
    columns = 'Symbol',
    values = 'Close'
).sort_index()

In [None]:
pivot_clone.head()

In [None]:
pivot_clone_volume = all_df.pivot(
    index = 'Date',
    columns = 'Symbol',

).sort_index()
pivot_clone_volume.head()

In [None]:
another_df = all_df.set_index(['Symbol','Date'])
unstacked_df = another_df.unstack(level = 'Symbol')
unstacked_df.head()

In [None]:
unstacked_df.stack(level='Symbol').head()

In [None]:
stacked_df = unstacked_df.stack(level = 'Symbol')
swapped_df = stacked_df.swaplevel('Symbol', 'Date')
swapped_df.head()

In [None]:
swapped_df.sort_index().head()

### Melting with Pandas

In [None]:
all_df.melt(id_vars=['Symbol','Date'])

### Aggregating Results with Pandas

In [None]:
pivot_close = all_df.pivot(
    index = 'Date',
    columns = 'Symbol',
    values = 'Close'
).sort_index().reset_index()


mean_pivot = pd.DataFrame()
mean_pivot['Symbol'] = pivot_close['Symbol'].to_list()
mean_pivot['average_price'] = pivot_close.set_index('Symbol').apply(
    lambda x: x.mean(), axis = 1
).to_list()

mean_pivot.set_index('Symbol')
