In [1]:
import numpy as np
import tqdm
import pandas as pd

In [2]:
supplychain = pd.read_csv("../assets/global_supplychain.csv")
supplychain["accounting_as_of_date"] = pd.to_datetime(supplychain.accounting_as_of_date)
supplychain["accounting_as_of_date"] = supplychain["accounting_as_of_date"].dt.strftime('%m/%Y')
supplychain.drop_duplicates(subset=["accounting_as_of_date", "supplier_ticker", "customer_ticker"], keep='first', inplace=True)

In [3]:
stock_market = pd.read_pickle("../assets/filtered_us_eod.pkl")
stock_market

Unnamed: 0,Date,ticker,Date.1,adj_close,mom,MACD,vol
0,2010-01-01,A,2010-01-01,20.822542,5.517390,3.395021,0.000097
1,2010-01-04,A,2010-01-04,20.976683,5.344631,3.421450,0.000094
2,2010-01-05,A,2010-01-05,20.748822,4.587213,3.431355,0.000100
3,2010-01-06,A,2010-01-06,20.675102,4.223339,3.434483,0.000098
4,2010-01-07,A,2010-01-07,20.648294,4.011330,3.433929,0.000098
...,...,...,...,...,...,...,...
13171387,2019-10-14,ZIXI,2019-10-14,6.750000,-3.205437,-0.218251,0.000565
13171388,2019-10-15,ZIXI,2019-10-15,6.865000,-2.722324,-0.243142,0.000575
13171389,2019-10-16,ZIXI,2019-10-16,6.650000,-3.055404,-0.278275,0.000600
13171390,2019-10-17,ZIXI,2019-10-17,6.650000,-2.862559,-0.310736,0.000588


In [4]:
stock_market["Date"] = pd.to_datetime(stock_market.Date)
stock_market["Date"] = stock_market["Date"].dt.strftime('%m/%Y')
stock_market.drop_duplicates(subset=["Date", "ticker"], keep='first', inplace=True)

In [5]:
stock_market

Unnamed: 0,Date,ticker,Date.1,adj_close,mom,MACD,vol
0,01/2010,A,2010-01-01,20.822542,5.517390,3.395021,0.000097
21,02/2010,A,2010-02-01,19.522389,0.521171,2.810435,0.000226
41,03/2010,A,2010-03-01,21.506127,2.814703,2.682174,0.000233
64,04/2010,A,2010-04-01,23.154774,2.918440,3.625275,0.000082
86,05/2010,A,2010-05-03,24.642577,3.616656,4.141693,0.000175
...,...,...,...,...,...,...,...
13171294,06/2019,ZIXI,2019-06-03,8.740000,0.634881,2.100240,0.003782
13171314,07/2019,ZIXI,2019-07-01,9.420000,2.370835,1.975709,0.000588
13171336,08/2019,ZIXI,2019-08-01,9.330000,0.976691,1.693943,0.000638
13171358,09/2019,ZIXI,2019-09-03,7.400000,-1.565649,0.421776,0.000733


In [6]:
# check overlapping

companies = list(set(stock_market["ticker"].values.tolist())) # get rid of date
customers = supplychain["customer_ticker"].values.tolist()
suppliers = supplychain["supplier_ticker"].values.tolist()


customer_suppliers = customers + suppliers
customer_suppliers = set(customer_suppliers)
print(len(customer_suppliers))
print(len(set(customers)))
print(len(set(suppliers)))
# customers = np.sort(customers)
# suppliers = np.sort(suppliers)

23854
19313
7974


In [7]:
# calculate overlaps

overlap = [x for x in tqdm.tqdm(companies) if x in customer_suppliers]
print(len(overlap))
set_customers = set(customers)
set_suppliers = set(suppliers)
overlap_customers = [x for x in tqdm.tqdm(companies) if x in set_customers]
print(len(overlap_customers))
overlap_suppliers = [x for x in tqdm.tqdm(companies) if x in set_suppliers]
print(len(overlap_suppliers))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1790/1790 [00:00<00:00, 602407.46it/s]


1790


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1790/1790 [00:00<00:00, 886504.21it/s]


933


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1790/1790 [00:00<00:00, 905973.71it/s]


1403


In [8]:
# remove non overlapped on stock market
non_overlapping_companies = [x for x in companies if x not in overlap]
# print(len(non_overlapping_companies))

print("Before(Market):", stock_market.shape)
stock_market_updated = stock_market.drop(non_overlapping_companies, axis=1)
print(stock_market_updated.shape)
print("After(Market):", stock_market_updated.shape)

# remove non overlapped on supply chain

print("Before(Supply Chain):", supplychain.shape)
supplychain_updated = supplychain[supplychain.supplier_ticker.isin(overlap_suppliers)]
supplychain_updated = supplychain_updated[supplychain_updated.customer_ticker.isin(overlap_customers)]
print("After(Supply Chain):", supplychain_updated.shape)

Before(Market): (211220, 7)
(211220, 7)
After(Market): (211220, 7)
Before(Supply Chain): (179303, 9)
After(Supply Chain): (18932, 9)


In [9]:
with open('../assets/overlapping_companies.txt', 'w') as f:
    for item in overlap:
        f.write("%s\n" % item)

In [10]:
stock_market_updated

Unnamed: 0,Date,ticker,Date.1,adj_close,mom,MACD,vol
0,01/2010,A,2010-01-01,20.822542,5.517390,3.395021,0.000097
21,02/2010,A,2010-02-01,19.522389,0.521171,2.810435,0.000226
41,03/2010,A,2010-03-01,21.506127,2.814703,2.682174,0.000233
64,04/2010,A,2010-04-01,23.154774,2.918440,3.625275,0.000082
86,05/2010,A,2010-05-03,24.642577,3.616656,4.141693,0.000175
...,...,...,...,...,...,...,...
13171294,06/2019,ZIXI,2019-06-03,8.740000,0.634881,2.100240,0.003782
13171314,07/2019,ZIXI,2019-07-01,9.420000,2.370835,1.975709,0.000588
13171336,08/2019,ZIXI,2019-08-01,9.330000,0.976691,1.693943,0.000638
13171358,09/2019,ZIXI,2019-09-03,7.400000,-1.565649,0.421776,0.000733


In [11]:
supplychain_updated

Unnamed: 0.1,Unnamed: 0,public_domain_date,accounting_as_of_date,supplier_exchange,supplier_ticker,customer_exchange,customer_ticker,revenue_dependency,revenue_dependency_annotation
81,3028490,2003-12-15,09/2003,NasdaqGS,ISSC,NYSE,WRB,0.110000,Actual
121,1376988,2004-02-16,12/2003,NasdaqGS,INTC,NYSE,HPQ,0.150000,Actual
162,3030190,2004-03-01,12/2003,NasdaqGS,UTEK,NasdaqGS,INTC,0.260000,Actual
170,2515506,2004-02-13,12/2003,NYSE,BMY,NYSE,ABC,0.130000,Actual
172,2522801,2004-03-08,12/2003,NYSE,CRR,NYSE,SLB,0.301000,Actual
...,...,...,...,...,...,...,...,...,...
3074563,3031885,2019-09-11,07/2019,NYSE,CIEN,NYSE,VZ,0.085047,Actual
3074566,3048621,2019-09-12,07/2019,NasdaqCM,OPTT,LSE,PMO,0.470000,Actual
3074567,3048614,2019-09-12,07/2019,NasdaqCM,OPTT,NYSE,E,0.140000,Actual
3074584,3032019,2019-08-08,08/2019,NasdaqGS,UEIC,NasdaqGS,DISH,0.103000,Actual


In [19]:
for company in tqdm.tqdm(overlap_suppliers):
    for date in set(stock_market_updated["Date"].unique()):
        if date in supplychain_updated["accounting_as_of_date"].values:
#             print(supplychain_updated.loc[(supplychain_updated['supplier_ticker'] == company) &
#                                    (supplychain_updated['accounting_as_of_date'] == date), "revenue_dependency"])
            stock_market_updated.loc[(stock_market_updated['ticker'] == company) &
                                   (stock_market_updated['Date'] == date), "revenue_dependency"] = \
            sum(supplychain_updated.loc[(supplychain_updated['supplier_ticker'] == company) &
                                   (supplychain_updated['accounting_as_of_date'] == date), "revenue_dependency"])


            
# joined_market_info = stock_market_updated.merge(supplychain_updated, left_on=["Date", "ticker"], right_on=["accounting_as_of_date", "supplier_ticker"])
    
# joined_market_info
    
    


  0%|                                                                                                                                                                                                                                                                                                           | 0/1403 [00:00<?, ?it/s]
  0%|▏                                                                                                                                                                                                                                                                                                | 1/1403 [00:07<2:52:56,  7.40s/it]

KeyboardInterrupt: 

In [16]:
[x for x in stock_market_updated["revenue_dependency"].values if x > -1]

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.158,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.518,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.667,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.856,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.866,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.929,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.9800000000000001,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.8800000000000001,
 0.0,
 0.0