In [14]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [15]:
apple_df = pd.read_csv('./AAPL_stock_data.csv')
snapchat_df = pd.read_csv('./SNA_stock_data.csv')
twitter_df = pd.read_csv('./TWTR_stock_data.csv')

In [16]:
apple_df['stock_name'] = 'Apple'
snapchat_df['stock_name'] = 'Snapchat'
twitter_df['stock_name'] = 'Twitter'

In [17]:
combined_df = pd.concat([apple_df, snapchat_df, twitter_df], ignore_index=True)

In [21]:
basket = (combined_df.groupby(['date', '1. open', '2. high', '3. low', '4. close', 'stock_name'])['5. volume']
          .sum().unstack().reset_index().fillna(0)
          .set_index('date'))

In [22]:
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

In [23]:
basket_sets = basket.applymap(encode_units)

frequent_itemsets = apriori(basket_sets, min_support=0.01, use_colnames=True)



In [24]:
combined_df.to_csv('combined_stock_data.csv', index=False)

In [26]:
print(frequent_itemsets)

    support                                        itemsets
0   1.00000                                       (1. open)
1   1.00000                                       (2. high)
2   1.00000                                        (3. low)
3   1.00000                                      (4. close)
4   0.42099                                         (Apple)
..      ...                                             ...
58  0.42099           (4. close, 3. low, 2. high, Snapchat)
59  0.15795            (4. close, 3. low, 2. high, Twitter)
60  0.42099     (4. close, Apple, 2. high, 3. low, 1. open)
61  0.42099  (4. close, Snapchat, 2. high, 3. low, 1. open)
62  0.15795   (4. close, 2. high, 3. low, 1. open, Twitter)

[63 rows x 2 columns]
