## Data merging

In [1]:
import pandas as pd
import numpy as np

In [2]:
def fetch_financial_data(company='AMZN'):
    """Downloads data on stock exchange quotations according 
    to symbols on the website stooq.pl """
    import pandas_datareader.data as web
    return web.DataReader(name=company, data_source='stooq')

In [5]:
apple = fetch_financial_data('AAPL')
amazon = fetch_financial_data()
google = fetch_financial_data('GOOGL')
uber = fetch_financial_data('UBER')

In [6]:
apple.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1257 entries, 2020-09-16 to 2015-09-21
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    1257 non-null   float64
 1   High    1257 non-null   float64
 2   Low     1257 non-null   float64
 3   Close   1257 non-null   float64
 4   Volume  1257 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 58.9 KB


In [7]:
uber.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 342 entries, 2020-09-16 to 2019-05-10
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    342 non-null    float64
 1   High    342 non-null    float64
 2   Low     342 non-null    float64
 3   Close   342 non-null    float64
 4   Volume  342 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 16.0 KB


In [8]:
apple.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-09-16,115.23,116.0,112.04,112.13,155026675
2020-09-15,118.33,118.829,113.61,115.54,184642039
2020-09-14,114.72,115.93,112.8,115.355,140150087
2020-09-11,114.57,115.23,110.0,112.0,180860325
2020-09-10,120.36,120.5,112.5,113.49,182274391


In [9]:
uber.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-09-16,37.39,38.52,37.39,37.66,21786591
2020-09-15,37.95,38.48,37.27,37.47,23532584
2020-09-14,37.1,38.0,37.08,37.95,28016041
2020-09-11,36.3,37.145,36.254,36.98,27130921
2020-09-10,35.2,36.78,35.19,35.98,35468315


In [11]:
apple.columns = ['apple_'+col.lower() for col in apple.columns]
amazon.columns = ['amazon_'+col.lower() for col in amazon.columns]
google.columns = ['google_'+col.lower() for col in google.columns]
uber.columns = ['uber_'+col.lower() for col in uber.columns]

In [12]:
apple.head()

Unnamed: 0_level_0,apple_open,apple_high,apple_low,apple_close,apple_volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-09-16,115.23,116.0,112.04,112.13,155026675
2020-09-15,118.33,118.829,113.61,115.54,184642039
2020-09-14,114.72,115.93,112.8,115.355,140150087
2020-09-11,114.57,115.23,110.0,112.0,180860325
2020-09-10,120.36,120.5,112.5,113.49,182274391


In [14]:
df = pd.concat(objs=[apple,amazon,google,uber], axis=1)

In [18]:
pd.set_option('display.float_format', lambda x: f'{x:.2f}')

df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
apple_open,1257.0,45.22,20.74,21.12,27.7,41.58,52.32,137.59
apple_high,1257.0,45.7,21.06,21.51,27.88,41.93,52.91,137.98
apple_low,1257.0,44.77,20.37,21.0,27.61,41.24,51.96,130.53
apple_close,1257.0,45.26,20.73,21.2,27.74,41.51,52.42,134.18
apple_volume,1257.0,139764557.3,64815633.88,46073508.0,95595320.0,122547132.0,165697392.0,565092196.0
amazon_open,1257.0,1408.39,656.86,478.01,809.5,1466.89,1823.12,3547.0
amazon_high,1257.0,1422.92,666.05,493.5,816.02,1501.05,1840.65,3552.25
amazon_low,1257.0,1391.94,646.57,474.0,804.27,1448.45,1807.0,3486.68
amazon_close,1257.0,1408.23,656.42,482.07,810.32,1470.9,1822.49,3531.45
amazon_volume,1257.0,4281444.44,2146768.5,881337.0,2827852.0,3718057.0,5114403.0,16552598.0


In [19]:
df.corr()

Unnamed: 0,apple_open,apple_high,apple_low,apple_close,apple_volume,amazon_open,amazon_high,amazon_low,amazon_close,amazon_volume,google_open,google_high,google_low,google_close,google_volume,uber_open,uber_high,uber_low,uber_close,uber_volume
apple_open,1.0,1.0,1.0,1.0,0.03,0.94,0.94,0.93,0.93,0.12,0.93,0.93,0.92,0.92,0.06,-0.27,-0.27,-0.25,-0.26,0.1
apple_high,1.0,1.0,1.0,1.0,0.04,0.93,0.94,0.93,0.93,0.13,0.92,0.93,0.92,0.92,0.07,-0.28,-0.28,-0.26,-0.26,0.11
apple_low,1.0,1.0,1.0,1.0,0.02,0.94,0.94,0.94,0.94,0.11,0.93,0.93,0.93,0.93,0.06,-0.27,-0.27,-0.25,-0.26,0.1
apple_close,1.0,1.0,1.0,1.0,0.03,0.93,0.94,0.94,0.94,0.12,0.93,0.93,0.92,0.93,0.06,-0.27,-0.28,-0.26,-0.26,0.1
apple_volume,0.03,0.04,0.02,0.03,1.0,-0.02,-0.01,-0.03,-0.02,0.45,-0.08,-0.07,-0.1,-0.09,0.44,-0.34,-0.32,-0.38,-0.36,0.36
amazon_open,0.94,0.93,0.94,0.93,-0.02,1.0,1.0,1.0,1.0,0.19,0.95,0.95,0.94,0.94,0.08,-0.08,-0.08,-0.07,-0.07,-0.03
amazon_high,0.94,0.94,0.94,0.94,-0.01,1.0,1.0,1.0,1.0,0.2,0.94,0.95,0.94,0.94,0.08,-0.09,-0.09,-0.08,-0.08,-0.02
amazon_low,0.93,0.93,0.94,0.94,-0.03,1.0,1.0,1.0,1.0,0.18,0.95,0.95,0.94,0.95,0.07,-0.08,-0.08,-0.06,-0.07,-0.03
amazon_close,0.93,0.93,0.94,0.94,-0.02,1.0,1.0,1.0,1.0,0.19,0.94,0.95,0.94,0.95,0.07,-0.09,-0.09,-0.08,-0.08,-0.02
amazon_volume,0.12,0.13,0.11,0.12,0.45,0.19,0.2,0.18,0.19,1.0,0.14,0.15,0.12,0.13,0.64,-0.24,-0.22,-0.27,-0.25,0.28


In [20]:
df.columns

Index(['apple_open', 'apple_high', 'apple_low', 'apple_close', 'apple_volume',
       'amazon_open', 'amazon_high', 'amazon_low', 'amazon_close',
       'amazon_volume', 'google_open', 'google_high', 'google_low',
       'google_close', 'google_volume', 'uber_open', 'uber_high', 'uber_low',
       'uber_close', 'uber_volume'],
      dtype='object')

In [21]:
closes = [col for col in df.columns if col.endswith('close')]
closes

['apple_close', 'amazon_close', 'google_close', 'uber_close']

In [22]:
df_closes = df[closes]
df_closes

Unnamed: 0_level_0,apple_close,amazon_close,google_close,uber_close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-09-21,26.62,548.39,666.98,
2015-09-22,26.20,538.40,653.20,
2015-09-23,26.41,536.07,653.29,
2015-09-24,26.57,533.75,654.91,
2015-09-25,26.50,524.25,640.15,
...,...,...,...,...
2020-09-10,113.49,3175.11,1526.05,35.98
2020-09-11,112.00,3116.22,1515.76,36.98
2020-09-14,115.36,3102.97,1508.83,37.95
2020-09-15,115.54,3156.13,1535.12,37.47


In [23]:
df_closes.corr()

Unnamed: 0,apple_close,amazon_close,google_close,uber_close
apple_close,1.0,0.94,0.93,-0.26
amazon_close,0.94,1.0,0.95,-0.08
google_close,0.93,0.95,1.0,-0.16
uber_close,-0.26,-0.08,-0.16,1.0


In [24]:
cls_and_vol = [col for col in df.columns if col.endswith('close') or col.endswith('volume')]
df_c_a_v = df[cls_and_vol]
df_c_a_v

Unnamed: 0_level_0,apple_close,apple_volume,amazon_close,amazon_volume,google_close,google_volume,uber_close,uber_volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-09-21,26.62,217383992,548.39,3283300,666.98,1954600,,
2015-09-22,26.20,217921596,538.40,3841700,653.20,2718000,,
2015-09-23,26.41,154771892,536.07,2237571,653.29,1457995,,
2015-09-24,26.57,217048828,533.75,3499381,654.91,1934252,,
2015-09-25,26.50,243051812,524.25,4030900,640.15,2423900,,
...,...,...,...,...,...,...,...,...
2020-09-10,113.49,182274391,3175.11,5330741,1526.05,1651194,35.98,35468315.00
2020-09-11,112.00,180860325,3116.22,5093982,1515.76,1535337,36.98,27130921.00
2020-09-14,115.36,140150087,3102.97,4529596,1508.83,2133007,37.95,28016041.00
2020-09-15,115.54,184642039,3156.13,4021535,1535.12,1152055,37.47,23532584.00


In [25]:
uber.head()

Unnamed: 0_level_0,uber_open,uber_high,uber_low,uber_close,uber_volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-09-16,37.39,38.52,37.39,37.66,21786591
2020-09-15,37.95,38.48,37.27,37.47,23532584
2020-09-14,37.1,38.0,37.08,37.95,28016041
2020-09-11,36.3,37.15,36.25,36.98,27130921
2020-09-10,35.2,36.78,35.19,35.98,35468315


In [26]:
uber_6 = uber[uber.index.month == 6]
uber_6

Unnamed: 0_level_0,uber_open,uber_high,uber_low,uber_close,uber_volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-06-30,30.83,31.25,30.13,31.08,21282810
2020-06-29,29.51,29.75,28.39,29.63,20795402
2020-06-26,30.44,30.44,29.21,29.61,46482331
2020-06-25,30.0,30.86,29.56,30.58,19142789
2020-06-24,32.75,32.93,30.45,30.46,29477229
2020-06-23,32.88,33.21,32.59,33.05,13622126
2020-06-22,32.43,32.85,31.43,32.68,17790276
2020-06-19,33.98,34.02,32.27,32.3,29426285
2020-06-18,33.0,33.44,32.8,33.4,15495838
2020-06-17,33.5,33.59,33.01,33.29,14827742


In [27]:
uber_7 = uber[uber.index.month == 7]
uber_7

Unnamed: 0_level_0,uber_open,uber_high,uber_low,uber_close,uber_volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-07-31,30.4,30.42,29.79,30.26,16360999
2020-07-30,30.65,30.68,29.97,30.24,13062912
2020-07-29,30.92,31.26,30.77,31.02,8215892
2020-07-28,30.79,31.24,30.35,30.81,12977145
2020-07-27,31.21,31.6,30.75,30.99,10182302
2020-07-24,31.74,31.8,30.47,31.18,16896059
2020-07-23,32.33,33.02,31.98,32.17,12749770
2020-07-22,32.44,32.88,32.33,32.66,12128382
2020-07-21,34.16,34.23,32.53,32.55,16743293
2020-07-20,32.48,33.92,32.35,33.66,16213609


In [28]:
uber_6.append(uber_7)

Unnamed: 0_level_0,uber_open,uber_high,uber_low,uber_close,uber_volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-06-30,30.83,31.25,30.13,31.08,21282810
2020-06-29,29.51,29.75,28.39,29.63,20795402
2020-06-26,30.44,30.44,29.21,29.61,46482331
2020-06-25,30.00,30.86,29.56,30.58,19142789
2020-06-24,32.75,32.93,30.45,30.46,29477229
...,...,...,...,...,...
2019-07-08,43.59,43.85,42.75,42.95,9304101
2019-07-05,44.31,44.55,43.01,43.53,8239452
2019-07-03,44.00,44.46,43.79,44.23,3380003
2019-07-02,44.55,44.68,43.75,44.00,11881274
