In [1]:
'''
This project has the objective of replicating the data frame returned by the Pipeline module during the Udemy course lecture 
(Quantopian platform was shut down after November 2020). This pipeline returns a list with data frames or a single DF 
for a single company within any given time for the chosen companies.

Classic pandas_datareader module was used to get the data necessary for the pipelines.

List of the top 15 energy companies by market cap. From https://en.wikipedia.org/wiki/List_of_largest_energy_companies

Trading between January 2019 and January 2021

''';

In [2]:
#Imports
import pandas as pd
import pandas_datareader as web
import datetime

In [3]:
#Start and End Dates
start = datetime.datetime(2019,1,1)
end = datetime.datetime(2021,1,1)

In [4]:
#Tickers 
companies = ['XOM','RDS.A','CVX','TOT','BP','PTR','SNP','SLB','EPD','E','COP','EQNR','EOG','PBR']

In [12]:
def make_pipeline(companies):
    df = []
    
    #Taking the information and cleaning it up
    for stocks in companies:
        try:
            info = web.DataReader(stocks, 'yahoo', start, end)
        except KeyError:
            continue
        finally:
            df.append(info)
    
    #Adding the Longs and Shorts to our DF's
    for i in range(0,len(df),1):
        
        mva_10 = df[i]['Close'].rolling(10).mean()
        mva_30 = df[i]['Close'].rolling(30).mean()
        
        percent_diff = (mva_10 - mva_30) / mva_30
        
        shorts = percent_diff < 0
        longs = percent_diff > 0
          
        df[i]['Longs'] = longs
        df[i]['Shorts'] = shorts
        df[i]['Percent Diff'] = percent_diff
        
        df[i] = df[i][['Longs','Shorts','Percent Diff']]
    
    return df

In [13]:
df = make_pipeline(companies)

In [16]:
df

[            Longs  Shorts  Percent Diff
 Date                                   
 2019-01-02  False   False           NaN
 2019-01-03  False   False           NaN
 2019-01-04  False   False           NaN
 2019-01-07  False   False           NaN
 2019-01-08  False   False           NaN
 ...           ...     ...           ...
 2020-12-24   True   False      0.049853
 2020-12-28   True   False      0.039207
 2020-12-29   True   False      0.032492
 2020-12-30   True   False      0.026098
 2020-12-31   True   False      0.017947
 
 [505 rows x 3 columns],
             Longs  Shorts  Percent Diff
 Date                                   
 2019-01-02  False   False           NaN
 2019-01-03  False   False           NaN
 2019-01-04  False   False           NaN
 2019-01-07  False   False           NaN
 2019-01-08  False   False           NaN
 ...           ...     ...           ...
 2020-12-24   True   False      0.049853
 2020-12-28   True   False      0.039207
 2020-12-29   True   False    

In [11]:
True in df[0]['Longs']

False