# SM Prototype 10K Train/Test Split, Vectorization, Linear Regression

In [1]:
import pandas as pd
import numpy as np
import re
import nltk
nltk.download('stopwords')
from utils import clean_text
import edgar

# Expand the max width of how our dataFrames display on screen
pd.options.display.max_colwidth = 500

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\yural\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
def ten_k_files():
    company_dict = {"FANG": "0001539838", "RSPP":"0001588216", "CXO":" 0001358071", "MDU": "0000067716",
               "LPI": "0001528129", "NFX": "0000912750", "ECA": "0001157806", "GPOR": "0000874499",
               "RRC": "0000315852", "EQT": "0000033213", "SM": "0000893538"}
    docs_df = pd.DataFrame(columns=['Filing_Type','Filing_Date','Company','Text','Label'])
    for comp in company_dict:
        year = 2019
        cik = company_dict[comp]
        company = edgar.Company(comp, cik)
        tree = company.getAllFilings(filingType = "10-K")
        docs = edgar.getDocuments(tree, noOfDocuments=5)

        for doc in docs:
            #print(comp)
            year -= 1
            
            docs_df = docs_df.append({'Filing_Type': '10-K','Filing_Date': year,'Company':comp, 'Text' :doc,'Label' :'1'},ignore_index=True)
    return docs_df



In [3]:
docs_df = ten_k_files()


In [5]:
docs_df['Filing_Date'] = docs_df['Filing_Date'].apply(str)

In [33]:
docs_df['Index'] = docs_df[['Filing_Date', 'Company']].apply(lambda x: ''.join(x), axis=1)
index_df = docs_df['Index']
index_df.to_csv("index.csv")

docs_df = docs_df.set_index('Index')
docs_df

Unnamed: 0_level_0,Filing_Type,Filing_Date,Company,Text,Label
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018FANG,10-K,2018,FANG,"\n10-K/A\n1\ndiamondback201810-ka.htm\nDIAMONDBACK 10-K/A\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\n UNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K/AAmendment No. 1 ýANNUAL REPORT UNDER SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934For the fiscal year ended December 31, 2018 ORoTRANSITION REPORT UNDER SECTION 13 OR 15(d) OF SECURITIES EXCHANGE ACT OF 1934Commission File Number 001-35700 Diamondback Energy, Inc.(Exact Name of Re...",1
2017FANG,10-K,2017,FANG,"\n10-K\n1\ndiamondback201810-k.htm\nDIAMONDBACK 10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\nTable of Contents UNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K ýANNUAL REPORT UNDER SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934For the fiscal year ended December 31, 2018 OR¨TRANSITION REPORT UNDER SECTION 13 OR 15(d) OF SECURITIES EXCHANGE ACT OF 1934Commission File Number 001-35700 Diamondback Energy, Inc.(Exact Name of Registr...",1
2016FANG,10-K,2016,FANG,"\n10-K\n1\ndiamondback201710-k.htm\nDIAMONDBACK 10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\nTable of Contents UNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K ýANNUAL REPORT UNDER SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934For the fiscal year ended December 31, 2017 OR¨TRANSITION REPORT UNDER SECTION 13 OR 15(d) OF SECURITIES EXCHANGE ACT OF 1934Commission File Number 001-35700 Diamondback Energy, Inc.(Exact Name of Registr...",1
2015FANG,10-K,2015,FANG,"\n10-K\n1\ndiamondback201610-k.htm\nDIAMONDBACK 10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\nTable of Contents UNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K ýANNUAL REPORT UNDER SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934For the fiscal year ended December 31, 2016 OR¨TRANSITION REPORT UNDER SECTION 13 OR 15(d) OF SECURITIES EXCHANGE ACT OF 1934Commission File Number 001-35700 Diamondback Energy, Inc.(Exact Name of Registr...",1
2014FANG,10-K,2014,FANG,"\n10-K\n1\ndiamondback201510-k.htm\nDIAMONDBACK 10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\t10-K\n\t\n\t\nTable of Contents UNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K ýANNUAL REPORT UNDER SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934For the fiscal year ended December 31, 2015 OR¨TRANSITION REPORT UNDER SECTION 13 OR 15(d) OF SECURITIES EXCHANGE ACT OF 1934Commission File Number 001-35700 Diamondback Energy, Inc.(Exact Name of Registrant ...",1
2018RSPP,10-K,2018,RSPP,"\n10-K/A\n1\nd580186d10ka.htm\nFORM 10-K/A\n\n\nForm 10-K/A\n\n \nTable of Contents\n\n UNITED STATES \nSECURITIES AND EXCHANGE COMMISSION \nWashington, D.C. 20549 \n FORM\n10-K/A (Amendment No. 1) \n \n(Mark one) ☒\n ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year\nended December 31, 2017 or \n☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 \nFor the transition period from\n ...",1
2017RSPP,10-K,2017,RSPP,"\n10-K\n1\na2017123110k.htm\nFORM 10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\nTable of ContentsUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K (Mark one) ý ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year ended December 31, 2017 or o TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the transition period from to Co...",1
2016RSPP,10-K,2016,RSPP,"\n10-K\n1\nrspp2016123110k.htm\n10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\nTable of ContentsUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K (Mark one) ý ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year ended December 31, 2016 or o TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the transition period from to Comm...",1
2015RSPP,10-K,2015,RSPP,"\n10-K\n1\na2015123110k.htm\n10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\t10-K\n\t\n\t\nTable of ContentsUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K (Mark one) ý ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year ended December 31, 2015 or o TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the transition period from to Commission ...",1
2014RSPP,10-K,2014,RSPP,"\n10-K\n1\na2014123110k.htm\n10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\t2014.12.31 10K\n\t\n\t\nTable of ContentsUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K (Mark one) ý ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year ended December 31, 2014 or o TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the transition period from to C...",1


In [7]:
# Vectorization methods
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

# Classification model
from sklearn.linear_model import LinearRegression

### Train/Test Split

Train will be a 80% split of the corpus of documents comprising of 10K and 10Qs for the companies spanning time.
Test will be the 20% remaining documents.

Documents will be the features, and Labels will be the stock price change label (positive or negative)

There will be 3 main models: one for the stock change at 1 day, one for the stock change at 1 month, one for the stock change at 3 months (before the next quarter)

In [8]:
#first pass with just SM 10-K documents only
#company = edgar.Company("SM Energy", "0000893538")
#tree = company.getAllFilings(filingType = "10-K")
#docs = edgar.getDocuments(tree, noOfDocuments=6) #2014, 2015, 2016, 2017, 2018

In [9]:
#text2018 = docs[0]
#text2017 = docs[1]
#text2016 = docs[2]
#text2015 = docs[3]
#text2014 = docs[4]

In [28]:
# Create dataframe from data
#docs_df = pd.DataFrame(columns=['Filing_Type','Filing_Date','Company','Text','Label'])
#docs_df
#docs_df = docs_df.append({'Filing_Type': '10-K','Filing_Date': 2018,'Company':'SM', 'Text' :text2018,'Label' :'1'},ignore_index=True)
#docs_df = docs_df.append({'Filing_Type': '10-K','Filing_Date': 2017,'Company':'SM', 'Text' :text2017,'Label' :'0'},ignore_index=True)
#docs_df = docs_df.append({'Filing_Type': '10-K','Filing_Date': 2016,'Company':'SM', 'Text' :text2016,'Label' :'1'},ignore_index=True)
#docs_df = docs_df.append({'Filing_Type': '10-K','Filing_Date': 2015,'Company':'SM', 'Text' :text2015,'Label' :'0'},ignore_index=True)
#docs_df = docs_df.append({'Filing_Type': '10-K','Filing_Date': 2014,'Company':'SM', 'Text' :text2014,'Label' :'1'},ignore_index=True)

docs_df


Unnamed: 0_level_0,Filing_Type,Filing_Date,Company,Text,Label
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018FANG,10-K,2018,FANG,"\n10-K/A\n1\ndiamondback201810-ka.htm\nDIAMONDBACK 10-K/A\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\n UNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K/AAmendment No. 1 ýANNUAL REPORT UNDER SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934For the fiscal year ended December 31, 2018 ORoTRANSITION REPORT UNDER SECTION 13 OR 15(d) OF SECURITIES EXCHANGE ACT OF 1934Commission File Number 001-35700 Diamondback Energy, Inc.(Exact Name of Re...",1
2017FANG,10-K,2017,FANG,"\n10-K\n1\ndiamondback201810-k.htm\nDIAMONDBACK 10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\nTable of Contents UNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K ýANNUAL REPORT UNDER SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934For the fiscal year ended December 31, 2018 OR¨TRANSITION REPORT UNDER SECTION 13 OR 15(d) OF SECURITIES EXCHANGE ACT OF 1934Commission File Number 001-35700 Diamondback Energy, Inc.(Exact Name of Registr...",1
2016FANG,10-K,2016,FANG,"\n10-K\n1\ndiamondback201710-k.htm\nDIAMONDBACK 10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\nTable of Contents UNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K ýANNUAL REPORT UNDER SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934For the fiscal year ended December 31, 2017 OR¨TRANSITION REPORT UNDER SECTION 13 OR 15(d) OF SECURITIES EXCHANGE ACT OF 1934Commission File Number 001-35700 Diamondback Energy, Inc.(Exact Name of Registr...",1
2015FANG,10-K,2015,FANG,"\n10-K\n1\ndiamondback201610-k.htm\nDIAMONDBACK 10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\nTable of Contents UNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K ýANNUAL REPORT UNDER SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934For the fiscal year ended December 31, 2016 OR¨TRANSITION REPORT UNDER SECTION 13 OR 15(d) OF SECURITIES EXCHANGE ACT OF 1934Commission File Number 001-35700 Diamondback Energy, Inc.(Exact Name of Registr...",1
2014FANG,10-K,2014,FANG,"\n10-K\n1\ndiamondback201510-k.htm\nDIAMONDBACK 10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\t10-K\n\t\n\t\nTable of Contents UNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K ýANNUAL REPORT UNDER SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934For the fiscal year ended December 31, 2015 OR¨TRANSITION REPORT UNDER SECTION 13 OR 15(d) OF SECURITIES EXCHANGE ACT OF 1934Commission File Number 001-35700 Diamondback Energy, Inc.(Exact Name of Registrant ...",1
2018RSPP,10-K,2018,RSPP,"\n10-K/A\n1\nd580186d10ka.htm\nFORM 10-K/A\n\n\nForm 10-K/A\n\n \nTable of Contents\n\n UNITED STATES \nSECURITIES AND EXCHANGE COMMISSION \nWashington, D.C. 20549 \n FORM\n10-K/A (Amendment No. 1) \n \n(Mark one) ☒\n ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year\nended December 31, 2017 or \n☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 \nFor the transition period from\n ...",1
2017RSPP,10-K,2017,RSPP,"\n10-K\n1\na2017123110k.htm\nFORM 10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\nTable of ContentsUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K (Mark one) ý ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year ended December 31, 2017 or o TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the transition period from to Co...",1
2016RSPP,10-K,2016,RSPP,"\n10-K\n1\nrspp2016123110k.htm\n10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\nTable of ContentsUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K (Mark one) ý ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year ended December 31, 2016 or o TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the transition period from to Comm...",1
2015RSPP,10-K,2015,RSPP,"\n10-K\n1\na2015123110k.htm\n10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\t10-K\n\t\n\t\nTable of ContentsUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K (Mark one) ý ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year ended December 31, 2015 or o TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the transition period from to Commission ...",1
2014RSPP,10-K,2014,RSPP,"\n10-K\n1\na2014123110k.htm\n10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\t2014.12.31 10K\n\t\n\t\nTable of ContentsUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K (Mark one) ý ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year ended December 31, 2014 or o TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the transition period from to C...",1


In [9]:
### Assign X (data) and y (target)
X = docs_df.drop("Label", axis=1)
y = docs_df["Label"]
print(X.shape, y.shape)

(53, 4) (53,)


In [10]:
#train/test split at 80%
from sklearn.model_selection import train_test_split
XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [11]:
#look at train/test split
XTrain
yTrain
XTest
yTest

Index
2014MDU     1
2015RRC     1
2014EQT     1
2016CXO     1
2018EQT     1
2018RSPP    1
2016MDU     1
2016SM      1
2015FANG    1
2016ECA     1
2015CXO     1
Name: Label, dtype: object

In [12]:
#Create train/test dataframes
n_docs = 100000 #number of times to run model
train_docs = XTrain
train_labels = yTrain
test_docs = XTest
test_labels = yTest

train_df = pd.DataFrame({"text": train_docs['Text'], "labels": train_labels})
train_df

test_df = pd.DataFrame({"text": test_docs['Text'], "labels": test_labels})
test_df

print(f"Train Shape: {train_df.shape}")
print(f"Test Shape: {test_df.shape}")

Train Shape: (42, 2)
Test Shape: (11, 2)


In [13]:
train_df

Unnamed: 0_level_0,text,labels
Index,Unnamed: 1_level_1,Unnamed: 2_level_1
2015RSPP,"\n10-K\n1\na2015123110k.htm\n10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\t10-K\n\t\n\t\nTable of ContentsUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K (Mark one) ý ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year ended December 31, 2015 or o TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the transition period from to Commission ...",1
2017NFX,"\n10-K\n1\nnfx201610-k.htm\n10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\n UNITED STATES SECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549Form 10-K þANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year ended December 31, 2016or ¨TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the transition period from to .Commission file number: 1-12534Newfield Explora...",1
2017RSPP,"\n10-K\n1\na2017123110k.htm\nFORM 10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\nTable of ContentsUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K (Mark one) ý ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year ended December 31, 2017 or o TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the transition period from to Co...",1
2017GPOR,"\n10-K\n1\ngpor-12312017x10k.htm\n10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\nTable of ContentsIndex to Financial StatementsUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K (Mark One)ýANNUAL REPORT UNDER SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year ended December 31, 2017 OR¨TRANSITION REPORT UNDER SECTION 13 OR 15(d) OF SECURITIES EXCHANGE ACT OF 1934 For the transition period from to ...",1
2014FANG,"\n10-K\n1\ndiamondback201510-k.htm\nDIAMONDBACK 10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\t10-K\n\t\n\t\nTable of Contents UNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K ýANNUAL REPORT UNDER SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934For the fiscal year ended December 31, 2015 OR¨TRANSITION REPORT UNDER SECTION 13 OR 15(d) OF SECURITIES EXCHANGE ACT OF 1934Commission File Number 001-35700 Diamondback Energy, Inc.(Exact Name of Registrant ...",1
2014GPOR,"\n10-K\n1\ngpor-12312014x10k.htm\n10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tGPOR-12.31.2014-10K\n\t\n\t\nTable of ContentsIndex to Financial StatementsUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K (Mark One)ýANNUAL REPORT UNDER SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year ended December 31, 2014 OR¨TRANSITION REPORT UNDER SECTION 13 OR 15(d) OF SECURITIES EXCHANGE ACT OF 1934 For the transition period from to ...",1
2014LPI,"\n10-K\n1\na2014form10-k.htm\n10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\t2014 Form 10-K\n\t\n\t\nUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549FORM 10-Ký ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934For the fiscal year ended December 31, 2014oro TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934Commission file number: 001-35380Laredo Petroleum, Inc.(Exact name of registrant as specified in its charter)De...",1
2017EQT,"\n10-K\n1\neqt1231201810k.htm\n10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\nUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K [X] ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934FOR THE FISCAL YEAR ENDED DECEMBER 31, 2018 [ ]TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934orFOR THE TRANSITION PERIOD FROM ___________ TO __________ COMMISSION FILE NUMBER 001-03551 EQT CORPORATION(E...",1
2018GPOR,"\n10-K\n1\ngpor-12312018x10k.htm\n10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\nTable of ContentsIndex to Financial StatementsUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K (Mark One)ýANNUAL REPORT UNDER SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year ended December 31, 2018 OR¨TRANSITION REPORT UNDER SECTION 13 OR 15(d) OF SECURITIES EXCHANGE ACT OF 1934 For the transition period from to ...",1
2017SM,"\n10-K\n1\nsm-20171231x10k.htm\n10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\nUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549FORM 10-Kþ Annual Report Pursuant to Section 13 or 15(d) of the Securities Exchange Act of 1934For the fiscal year ended December 31, 2017 oro Transition Report Pursuant to Section 13 or 15(d) of the Securities Exchange Act of 1934Commission file number 001-31539SM ENERGY COMPANY(Exact name of registrant as specified in its charter)Delaw...",1


## CountVectorization

Turn the raw text from TrainText and TestText into feature vectors so that we can use them in our model.
We vectorize the text in 2 steps: 
1. First, we `fit`, the training data to our vectorizer to compute the vocabulary (feature set). 
2. Then, we `transform` with our text for both train and test to count the number occurrences for each word in our vocabulary.

The output of the CountVectorizer's `transform` task is a [sparse matrix](https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html#scipy.sparse.csr_matrix), which condenses the matrix values to avoid storing an excessive amount of zeros.

In [14]:
vectorizer = CountVectorizer(stop_words='english') ##could limit the words being used in here
vectorizer.fit(train_df['text']) ##fit training data's text to get the feature set
train_vecs = vectorizer.transform(train_df['text']) ##transform with training to count word frequencies
test_vecs = vectorizer.transform(test_df['text']) ##transform with test data to count word frequencies

In [20]:
all_df = pd.DataFrame({"text": docs_df['Text']})

all_vecs = vectorizer.transform(all_df['text']) ##transform with all data to count word frequencies


In [22]:
#full vocabulary size for training data
print(f"Number of documents: {train_vecs.shape[0]}")
print(f"Size of vocabulary: {train_vecs.shape[1]}")

#full vocabulary size for testing data
print(f"Number of documents: {test_vecs.shape[0]}")
print(f"Size of vocabulary: {test_vecs.shape[1]}")

#full vocabulary size for all data
print(f"Number of documents: {all_vecs.shape[0]}")
print(f"Size of vocabulary: {all_vecs.shape[1]}")

Number of documents: 42
Size of vocabulary: 29822
Number of documents: 11
Size of vocabulary: 29822
Number of documents: 53
Size of vocabulary: 29822


In [23]:
#get rid of non-zero values because it's a sparse matrix 
# Train
print(f"Number of TRAINING non-zero features: {train_vecs.nnz}")
print(f"Number of TRAINING zero features: {(train_vecs.shape[0]*train_vecs.shape[1])-train_vecs.nnz}")

# Test
print(f"Number of TEST non-zero features: {test_vecs.nnz}")
print(f"Number of TEST zero features: {(test_vecs.shape[0]*test_vecs.shape[1])-test_vecs.nnz}")

# All
print(f"Number of TEST non-zero features: {all_vecs.nnz}")
print(f"Number of TEST zero features: {(all_vecs.shape[0]*all_vecs.shape[1])-all_vecs.nnz}")

Number of TRAINING non-zero features: 195463
Number of TRAINING zero features: 1057061
Number of TEST non-zero features: 47247
Number of TEST zero features: 280795
Number of TEST non-zero features: 242710
Number of TEST zero features: 1337856


### Display all terms

Displays and exports all_vecs word frequencies to .csv

In [30]:

df_all_counts = pd.DataFrame(all_vecs.toarray(), 
                         columns=vectorizer.get_feature_names())[:100].T
df_all_counts.to_csv("all10kfinal2.csv")


## Term Frequency-Inverse Document Frequency (TF-IDF)

Tf-idf is a statistical representation of how relevant a word is to a particular document within a corpus. _Relevance_, in this scenario, can be defined as how much information a word provides about the context of one document vs all other documents in the corpus. 

In short, tf-idf is calculated by comparing the number of times that a particular terms occurs in a given document vs the number of other documents in the corpus that contain that word. A word that frequently occurs in 1 document, but only occurs in a very small number of other documents will have a high tf-idf score.

The calculation for tf-idf is the product of two smaller calculations:

$$TF_{i,j} = \frac{Number~of~times~word_{i}~occurs~in~document_{j}}{Total~number~of~words~in~document_{j}}$$


$$IDF_{i} = log(\frac{Total~number~of~documents~in~corpus}{Number~of~documents~that~contain~word_{i}})$$

##### Example: 

Let's say we have 10,000 documents about the solar system. If we were to take one single document with 200 terms and see that _Europa_ (one of Jupiter's moons) was mentioned 5 times, then _Europa's_ term frequency (tf) for that document would be: 

$$TF_{Europa, document} = \frac{5}{200}=0.025$$


Now if we were to see that _Europa_ only occurs in 50 of the total 10,000 documents, then the inverse document frequency (idf) would be: 

$$IDF_{Europa} = log(\frac{10,000}{50})=2.3$$

Therefore our tf-idf score for _Europa_ for that given document would be:

$$ 0.025 * 2.3 = 0.575 $$

This might actually be useful for us because we can see if specific terms are especially important to certain documents relative to others -- this only matters because maybe we can see when acquisitions might be on the horizon, for example.

In [19]:
#tf-idf vectorization using calculation
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_vectorizer.fit(train_df['text'])
train_tfidf_vecs = tfidf_vectorizer.transform(train_df['text'])
test_tfidf_vecs = tfidf_vectorizer.transform(test_df['text'])

### Display a few terms and their tf-idf scores for a few documents

This is only meant to be used for demonstration purposes. The cell below has no impact on the actual execution of our task. Also, this cell is only intended for use when the number of documents is small (<100), otherwise it will likely only display a bunch of zeros.

In [20]:
#alice note -- obviously need to clean this up a bit.
df_tfidf = pd.DataFrame(train_tfidf_vecs.toarray(), 
                         columns=tfidf_vectorizer.get_feature_names())[:15].T
df_tfidf.tail(20)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
yuma,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zane,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zavala,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zemljak,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005898,0.0
zernell,0.0,0.00595,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zero,0.001428,0.00133,0.000763,0.0,0.002187,0.0,0.003203,0.002091,0.0,0.006806,0.0,0.00148,0.0,0.0,0.0
zip,0.000564,0.000525,0.000603,0.000426,0.000431,0.000433,0.000421,0.000413,0.000423,0.000448,0.023502,0.000584,0.023502,0.0,0.000424
zone,0.003322,0.000619,0.002131,0.00903,0.002543,0.005613,0.004471,0.002918,0.012973,0.001055,0.0,0.00413,0.0,0.0,0.005004
zones,0.011039,0.000734,0.010114,0.004167,0.000604,0.002422,0.015327,0.000577,0.003553,0.0,0.0,0.010619,0.0,0.000944,0.002375
zoning,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000836,0.0,0.000907,0.0,0.0,0.0,0.0,0.0


#### Comparison of the representation of different words vs. straight up word frequency

In [21]:
pd.DataFrame({"TF-IDF of stock":df_tfidf.loc['stock'], "CountVectorizer: Stock":df_counts.loc['stock']})

Unnamed: 0,TF-IDF of stock,CountVectorizer: Stock
0,0.060008,104
1,0.121931,227
2,0.059209,96
3,0.074062,170
4,0.095842,217
5,0.068679,155
6,0.107844,250
7,0.092915,220
8,0.081892,189
9,0.070566,154


In [22]:
pd.DataFrame({"TF-IDF of drilling":df_tfidf.loc['drilling'], "CountVectorizer: Drilling":df_counts.loc['drilling']})

Unnamed: 0,TF-IDF of drilling,CountVectorizer: Drilling
0,0.096863,164
1,0.058831,107
2,0.101011,160
3,0.074027,166
4,0.117545,260
5,0.082093,181
6,0.078156,177
7,0.047554,110
8,0.072294,163
9,0.075516,161


# Modeling -- classifying whether certain terms appearing will result in a positive or negative stock price

Vectorizing our data has converted our text data into a numeric feature set. Using these vectors, we can now begin to develop machine learning models for things like classification.

To further this model, look into better preprocessing, regression regularization, vocabulary pruning for feature selection, and hyperparameter tuning.

In [23]:
#determine regression model
from sklearn.linear_model import LinearRegression
classifier = LinearRegression()
classifier

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [24]:
classifier.fit(train_vecs, train_df['labels'])
predictions = classifier.predict(test_vecs)
print(f"Prediction   {predictions[:10]}")
print(f"Actual labels: {test_df['labels']}")

The exact solution is  x = 0                              
Prediction   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Actual labels: 19    1
41    1
47    1
12    1
43    1
5     1
17    1
50    1
3     1
32    1
13    1
Name: labels, dtype: object


In [25]:
#Run a linear regression classification on the count vectors
count_linearReg = LinearRegression()
count_linearReg.fit(train_vecs, train_df['labels']) 
#train_vecs = frequency/ terms (unique features for this document)
#['labels'] is labeling if the change in stock price was positive or negative for the time frame
count_preds = count_linearReg.predict(test_vecs) 
#model.coef_ -> gives you coefficient values for all of terms. then look to see which have high weights / low weights. 
#terms are being weighted off of the change in the stock price. 
#if you see term in high weight (occurrence results in stock going up), (if low / negative weight, stock going down)

# Calculate the percentage of accurate predictions
accuracy = np.mean(count_preds==test_df['labels'])
print(f"LinearReg CountVectorizer accuracy: {accuracy}") ##linear regression, fit with the frequency*weights, 

The exact solution is  x = 0                              
LinearReg CountVectorizer accuracy: 0.0


### Run a logistic regression classification on the TF-IDF vectors

In [26]:
tfidf_linReg = LinearRegression()
tfidf_linReg.fit(train_tfidf_vecs, train_df['labels'])
tfidf_preds = tfidf_linReg.predict(test_tfidf_vecs)

# Calculate the percentage of accurate predictions
accuracy = np.mean(tfidf_preds==test_df['labels'])
print(f"LinReg TF-IDF accuracy: {accuracy}")

The exact solution is  x = 0                              
LinReg TF-IDF accuracy: 0.0


### View the terms with the highest coefficient values for each category

Notice that the terms highly weighted for each category seem to have highly negative weights for other categories. If we were to use more similarly related categories, we may not see such drastic differences.

Ignore the code behind this table. It is poorly written, but demonstrates the correct results.

In [28]:
from utils import getTopCoefs

getTopCoefs(num_terms=5, model=tfidf_linReg, class_labels=train_df['labels'], feature_names=tfidf_vectorizer.get_feature_names())

TypeError: 'numpy.float64' object is not iterable

### View coefficient weights for CountVectorizer features

In [32]:
getTopCoefs(num_terms=5, model=count_linearReg, class_labels=train_df['labels'], feature_names=vectorizer.get_feature_names())

TypeError: 'numpy.float64' object is not iterable

In [27]:
#Predicted inaccurately:
# Expand the max width of how our dataFrames display on screen
pd.options.display.max_colwidth = 1000

# Compile a dataframe with our text, the actual label, and the predicted label
final_df = pd.DataFrame({"text": test_df['text'], "Actual": test_df['labels'], "Prediction": tfidf_preds})

# Display the rows of our dataframe where the actual label and predicted label don't match
final_df.loc[(final_df['Actual'] != final_df['Prediction'])]

Unnamed: 0,text,Actual,Prediction
19,"\n10-K\n1\nmdu-12312014form10xk.htm\nMDU RESOURCES 2014 FORM 10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tWdesk | MDU-12.31.2014 Form 10-K\n\t\n\t\nUNITED STATES SECURITIES AND EXCHANGE COMMISSIONWASHINGTON, D.C. 20549FORM 10-Ký ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934For the fiscal year ended December 31, 2014ORo TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934For the transition period from _____________ to ______________Commission file number 1-3480MDU RESOURCES GROUP, INC.(Exact name of registrant as specified in its charter)Delaware 41-0423660(State or other jurisdiction ofincorporation or organization) (I.R.S. Employer Identification No.)1200 West Century AvenueP.O. Box 5650Bismarck, North Dakota 58506-5650(Address of principal executive offices)(Zip Code)(701) 530-1000(Registrant's telephone number, including area code)Securities registered pursuant to Section 12(b) of the Act:Title of each class Name of e...",1,1.0
41,"\n10-K\n1\nrrc-10k_20151231.htm\n10-K\n\n\n\n\n\nrrc-10k_20151231.htm\n\n\n\n\n\n \n \nUNITED STATES \nSECURITIES AND EXCHANGE COMMISSION \nWASHINGTON, D.C. 20549 \n \nFORM 10-K \n \n(Mark one) \n\n\n\n\nx\n\nANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 \nFor the fiscal year ended December 31, 2015\nOR \n\n\n\n\n¨\n\nTRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 \nFor the transition period from to \nCommission File Number: 001-12209 \n \nRANGE RESOURCES CORPORATION \n(Exact Name of Registrant as Specified in Its Charter) \n \n \n\n\n\n\nDelaware\n\n \n\n34-1312571\n\n\n\n(State or Other Jurisdiction of Incorporation or Organization)\n\n \n\n(IRS Employer Identification No.)\n\n\n\n \n\n \n\n\n\n100 Throckmorton Street, Suite 1200, Fort Worth, Texas\n\n \n\n76102\n\n\n\n(Address of Principal Executive Offices)\n\n \n\n(Zip Code)\n\n\nRegistrant’s telephone number, including area...",1,1.0
47,"\n10-K\n1\neqt-12312015x10k.htm\n10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\t10-K\n\t\n\t\nTable of ContentsUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K [X] ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934FOR THE FISCAL YEAR ENDED DECEMBER 31, 2015 [ ]TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934orFOR THE TRANSITION PERIOD FROM ___________ TO __________ COMMISSION FILE NUMBER 1-3551 EQT CORPORATION(Exact name of registrant as specified in its charter) PENNSYLVANIA(State or other jurisdiction of incorporation or organization) 25-0464690(IRS Employer Identification No.) 625 Liberty AvenuePittsburgh, Pennsylvania(Address of principal executive offices)15222(Zip Code) Registrant’s telephone number, including area code: (412) 553-5700 Securities registered pursuant to Section 12(b) of the Act: Title of each className of each exchange on which registeredCommon Stock, no par valueNew ...",1,1.0
12,"\n10-K\n1\nForm_10_K.htm\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n \n\n\n\n \n\n\n\n \n\n\n\n\nUNITED STATES SECURITIES AND EXCHANGE\nCOMMISSION \n\nWashington, D.C. 20549 \n\n \n\nFORM 10-K \n\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n\n \n ☑ \n \n \n \n \n \n ANNUAL REPORT PURSUANT TO SECTION 13 OR\n 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 \n \n \n\n\n\n\n\n\nFor the fiscal year ended December 31, 2016\n\nor \n\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n\n \n o \n \n \n \n \n \n TRANSITION REPORT PURSUANT TO SECTION 13\n OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 \n \n \n \n\n\n\n\n\n\nFor the transition period from \n \nto \n \n\n \n\nCommission file number: 1-33615 \n\n \n\nConcho Resources Inc. \n\n(Exact name of registrant as specified in its charter) \n\n\n \n \n \n \n \n \n \n \n \n \n \n\n \n Delaware \n \n \n \n \n \n 76-081860...",1,1.0
43,"\n10-K/A\n1\na19-8685_110ka.htm\n10-K/A\n\n\n\n\n\n\n\n\n\n \n\n\nTable of Contents\n \nUNITED STATES\nSECURITIES AND EXCHANGE COMMISSION\nWashington, D.C. 20549\n_________________________________________________\n \nFORM 10-K/A\nAmendment No. 1 to Form 10-K\n_________________________________________________\n \n(Mark One)\n \nx ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n \nFOR THE FISCAL YEAR ENDED DECEMBER 31, 2018\n \no TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n \nor\n \nFOR THE TRANSITION PERIOD FROM: ______ TO: _______\n \nCOMMISSION FILE NUMBER 001-03551\n \nEQT CORPORATION\n(Exact name of registrant as specified in its charter)\n \n\n\n\n\nPENNSYLVANIA \n\n \n\n25-0464690 \n\n\n(State or other jurisdiction of incorporation or organization) \n\n \n\n(IRS Employer Identification No.) \n\n ...",1,1.0
5,"\n10-K/A\n1\nd580186d10ka.htm\nFORM 10-K/A\n\n\nForm 10-K/A\n\n \nTable of Contents\n\n UNITED STATES \nSECURITIES AND EXCHANGE COMMISSION \nWashington, D.C. 20549 \n FORM\n10-K/A (Amendment No. 1) \n \n(Mark one) ☒\n ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year\nended December 31, 2017 or \n☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 \nFor the transition period from\n to \n Commission File Number: 001-36264 \nRSP Permian, Inc. (Exact\nname of registrant as specified in its charter) \n\n\n\n\n\n\n\n\n\n\nDelaware\n \n90-1022997\n\n (State or other jurisdiction\nof incorporation or organization)\n \n (I.R.S. Employer\nIdentification Number)\n\n 3141 Hood Street, Suite 500\nDallas, Texas\n \n75219\n\n(Address of principal executive offices)\n \n(Zip code)\n (214) 252-2700 \n(Registrants telephone number, including are...",1,1.0
17,"\n10-K\n1\na2016form10-k.htm\nMDU RESOURCES 2016 FORM 10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tWdesk | Document\n\t\n\t\nUNITED STATES SECURITIES AND EXCHANGE COMMISSIONWASHINGTON, D.C. 20549FORM 10-Ký ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934For the fiscal year ended December 31, 2016 ORo TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934For the transition period from _____________ to ______________Commission file number 1-3480MDU RESOURCES GROUP, INC.(Exact name of registrant as specified in its charter)Delaware 41-0423660(State or other jurisdiction ofincorporation or organization) (I.R.S. Employer Identification No.)1200 West Century AvenueP.O. Box 5650Bismarck, North Dakota 58506-5650(Address of principal executive offices)(Zip Code)(701) 530-1000(Registrant's telephone number, including area code)Securities registered pursuant to Section 12(b) of the Act:Title of each class Name of each exchange on which ...",1,1.0
50,"\n10-K\n1\nsm-20161231x10k.htm\n10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\nUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549FORM 10-Kþ Annual Report Pursuant to Section 13 or 15(d) of the Securities Exchange Act of 1934For the fiscal year ended December 31, 2016 oro Transition Report Pursuant to Section 13 or 15(d) of the Securities Exchange Act of 1934Commission file number 001-31539SM ENERGY COMPANY(Exact name of registrant as specified in its charter)Delaware(State or other jurisdictionof incorporation or organization)41-0518430(I.R.S. Employer Identification No.)1775 Sherman Street, Suite 1200, Denver, Colorado(Address of principal executive offices)80203(Zip Code)(303) 861-8140(Registrant’s telephone number, including area code)Securities registered pursuant to Section 12(b) of the Act:Title of each class Name of each exchange on which registeredCommon stock, $.01 par value New York Stock ExchangeSecurities registered pursuant to Section 12(g) ...",1,1.0
3,"\n10-K\n1\ndiamondback201610-k.htm\nDIAMONDBACK 10-K\n\n\n\n\t\n\t\t\n\t\t\n\t\tDocument\n\t\n\t\nTable of Contents UNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549 FORM 10-K ýANNUAL REPORT UNDER SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934For the fiscal year ended December 31, 2016 OR¨TRANSITION REPORT UNDER SECTION 13 OR 15(d) OF SECURITIES EXCHANGE ACT OF 1934Commission File Number 001-35700 Diamondback Energy, Inc.(Exact Name of Registrant As Specified in Its Charter) Delaware 45-4502447(State or Other Jurisdiction ofIncorporation or Organization) (IRS EmployerIdentification Number) 500 West Texas, Suite 1200Midland, Texas 79701(Address of Principal Executive Offices) (Zip Code)(Registrant Telephone Number, Including Area Code): (432) 221-7400 Securities registered pursuant to Section 12(b) of the Act: Title of Each Class Name of Each Exchange on Which Registered Common Stock, par value $0.01 per share The NASDAQ...",1,1.0
32,"\n10-K\n1\nd191939d10k.htm\n10-K\n\n\n10-K\n\n \nTable of Contents\n\n UNITED STATES SECURITIES AND EXCHANGE COMMISSION \nWashington, D.C. 20549 \nFORM 10-K \n(Mark One) \n\n\n[X]\n ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n For the fiscal year ended December 31, 2016 \nor \n\n\n[ ]\n TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n Commission file number 1-15226 \n \n\n\n\n ENCANA CORPORATION \n(Exact name of registrant as specified in its charter) \n\n\n\n\n\n\n\n\nCanada\n \nNot Applicable\n\n\n\n(State or other jurisdiction of incorporation or organization)\n \n(I.R.S. Employer Identification No.)\n Suite 4400, 500 Centre Street S.E., P.O. Box 2850, Calgary, Alberta, Canada, T2P 2S5 \n(Address of principal executive offices) \nRegistrants telephone number, including area code (403) 645-2000 \nSecurities registered pursuant to Section 12(b) of the Act: \n \n\n\n\n\n\n\n\n\n Title...",1,1.0
