In [1]:
import os
import io
import json
import pandas as pd
from dotenv import load_dotenv
import PyPDF2
import time
from azure.storage.blob import ContainerClient
# from openai import AzureOpenAI

import google.generativeai as genai
import os

from concurrent.futures import ThreadPoolExecutor, as_completed

# Load environment variables
load_dotenv()

# Set up Azure Blob Storage connection
connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
container_name = os.getenv("CONTAINER_NAME")  # Replace with your container name
container_client = ContainerClient.from_connection_string(
    conn_str=connection_string,
    container_name=container_name
)

# Function to extract text from a PDF file stream
def extract_text_from_pdf(file_stream):
    text = ""
    try:
        pdf_reader = PyPDF2.PdfReader(file_stream)
        for page in pdf_reader.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text
    except Exception as e:
        print(f"Error extracting text from PDF: {e}")
    return text



def get_json(file_stream, k):

    # genai.configure(api_key=os.getenv(f"GEMINI_API_KEY_{k}"))
    genai.configure(api_key=os.getenv(f"GEMINI_API_KEY_0"))
    
    # Extract text from PDF
    earning_call_transcript = extract_text_from_pdf(file_stream)
 
    model = genai.GenerativeModel(
        model_name="gemini-1.5-flash", 
        system_instruction="Please provide output in a JSON format."
        
    )
    client = model.start_chat()

    with open("prompt.txt", 'r') as prompt:
        prompt = prompt.read()
    

    print(f"*************API - {k} - IN USE***************")
    retries = 1
    while retries <= 3:
        try:
            # print("getting json...")
            response_json = client.send_message(prompt.format(earning_call_transcript=earning_call_transcript))
            print(f"done - {response_json.text}")
            # how_respose = client.send_message(how_content)
            return response_json.text # how_respose.text
        except Exception as e:
            retries += 1
            if retries > 3:
                print("!!!!!Couldn't Resolve!!!!!!")
                return None
            print(f"Error Occoured for API {k}: {e}\n Retrying...{retries}")
            time.sleep(15)

def process_blob(blob, k=1, retries=2,):
    attempt = 0
    while attempt < retries:
        try:
            stream = io.BytesIO()
            print(f"File_NAME = {blob.name}")
            container_client.download_blob(blob).readinto(stream)
            op = get_json(stream, k)
            if op: #and how:
                op = json.loads(op[op.index("{"):len(op)-op[::-1].index("}")])
                if op.get("capex"):  # Check if capex is not empty
                    return op #, how
                else:
                    print(f"Capex is empty. Retrying for blob {blob.name}...")
            attempt += 1
            time.sleep(5)  # Wait before retrying
        except Exception as e:
            print(f"Error processing blob {blob.name}: {e}")
            return None#, None
    print(f"Max retries reached for blob {blob.name}.")
    return None#, None

# function to process blobs in parallel
def get_capex_info(companies):
    final_json = []
    blob_list = list(container_client.list_blobs())[:]  # Limit to the first 20 blobs
    blob_list = [blob for blob in blob_list if blob.name.strip().split('/')[0] in companies]
    print(f"NUMBER OF FILES: {len(blob_list)}")
    completed = 0
    # Using ThreadPoolExecutor for parallel processing
    with ThreadPoolExecutor(max_workers=1) as executor:
        future_to_blob = {executor.submit(process_blob, blob, k%1): (blob, k) for k, blob in enumerate(blob_list)}
        # Iterate over completed futures
        for future in as_completed(future_to_blob):
            blob = future_to_blob[future]
            try:
                result = future.result()
                if result:
                    op = result
                    if op:  # Ensure op is not None
                        completed += 1
                        print(f"{completed} files completed.")
                        print(op, "\n\n") #, how)
                        final_json.append(op)
            except Exception as e:
                print(f"Error processing result for blob {blob.name}: {e}")

    # Ensure final_json contains valid data
    final_json = [item for item in final_json if isinstance(item, dict)]
    # Optional: Save final JSON to a file
    # output_file = "financial_analysis_results_1201.json"
    # with open(output_file, "w") as outfile:
    #     json.dump(final_json, outfile, indent=4)
    # final_json = json.loads(final_json)
    # Convert final_json to DataFrame and save to CSV
    if final_json:  # Check if final_json is not empty
        df = pd.DataFrame(final_json)
        # output_file = "financial_analysis_results10.csv"
        # df.to_csv(output_file, index=False)
        print(f"Total {completed} files completed.")
        print(f"Processing completed. Results saved to df.")
        return df, final_json
        # print(f"Processing completed. Results saved to {output_file}.")
    else:
        # print("No valid data to save to CSV.")
        print("No valid data to save to df.")
        return None, None



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#

# Read the original CSV file
# input_file = "all_1.csv"
# df = pd.read_csv(input_file)

def clean_df(df):
    df['company'] = df['company'].replace({
    'Amazon': 'Amazon.com',
    'Amazon.com, Inc.': 'Amazon.com',
    'Alphabet Inc':'Alphabet Inc.',
    'Amazon.com Inc.': 'Amazon.com',
    'Goldman Sachs' : 'Goldman Sachs Group, Inc.',
    'The Goldman Sachs Group, Inc.': 'Goldman Sachs Group, Inc.',
    'Goldman Sachs Group':'Goldman Sachs Group, Inc.',
    'Tesla' : 'Tesla Inc.',
    'Tesla, Inc.':'Tesla Inc.',
    'Tesla Motors, Inc.': 'Tesla Inc.',
    'Tesla Motors':'Tesla Inc.',
    'Valero Energy Corporation': 'Valero Energy',
    'Cardinal Health':'Cardinal Health, Inc.',
    'BERKSHIRE HATHAWAY INC.': 'Berkshire Hathaway Inc.',
    'Berkshire Hathaway Inc' : 'Berkshire Hathaway Inc.',
    '"Cardinal Health, Inc."':'Cardinal Health, Inc.',
    'Bank of America Corporation': 'Bank of America',
    'CVS Health Corp': 'CVS Health Corporation',
    'CVS Health':'CVS Health Corporation',
    'Cigna Corporation':'CIGNA', 
    'Cigna Corp.':'CIGNA', 
    'Cigna Corp':'CIGNA',
      'CI':'CIGNA',
      'CIGNA Corporation':'CIGNA',
      'Cigna':'CIGNA', 
      'Cigna Group':'CIGNA',
      'The Cigna Group':'CIGNA', 
      'Citigroup Inc.':'Citi',
      'Citigroup Inc':'Citi',
      'CMCSA':'Comcast',
      'Comcast Corporation':'Comcast',
      'Comcast Corp':'Comcast',
      'WellPoint':'Wellpoint Inc',
      'WellPoint':'Wellpoint Inc.',
      'Anthem Inc': 'Anthem Inc.', 
    'Elevance Health':'Elevance Health Inc', 
    'Exxon Mobil Corporation':'ExxonMobil',
      'ExxonMobil Corporation':'ExxonMobil',
       'Ford Motor Company':'Ford Motor', 
       'Ford Motor Co':'Ford Motor',
       'Ford Motor Co.':'Ford Motor',
        'Humana Inc':'Humana Inc.',
       'JPMorgan Chase & Co':'JPMorgan Chase & Co.',
       'JPMorgan Chase':'JPMorgan Chase & Co.',
       'McKesson Corporation':'McKesson' , 
       'McKESSON':'McKesson',
       'McKESSON CORPORATION':'McKesson',  
       'McKesson Corp.':'McKesson',
       'McKesson Corp':'McKesson',
       'Walgreens Boots Alliance':'Walgreens Boots Alliance Inc.',
       'Walgreens Boots Alliance Inc':'Walgreens Boots Alliance Inc.', 
       'Wal-Mart Stores, Inc.':'Walmart', 
      'WAL-MART STORES, INC':'Walmart',
       'Walmart Inc.':'Walmart', 
       'WAL-MART STORES, INC.':'Walmart',
       'WAL-MART STORES, INC. (NYSE: WMT)':'Walmart', 
       'Walmart, Inc.':'Walmart',
      
})

    df['capex'] = df['capex'].replace({ 'Not provided in the report':'-', 'nan':'-' ,'Not provided': '-', 'not available':'-','To be determined':'-','[Data Not Provided]':'-','Not Provided': '-','Not mentioned in the report':'-','X':'-', 'Not available': '-',  'Not specified': '-', 'n/a': '-', 'nan': '-','X.XX': '-','Not provided in the Report':'-','Not specified in the provided document':'-','Not Provided in the Report':'-','Not available in the provided document':'-','Not Available' :'-','[figures not provided]':'-', 'Not specified in the text provided': '-','Not mentioned' : '-','Not disclosed':'-'})
    df['company'] = df['company'].str.replace('"', ' ', regex=False)
    df['year'] = df['year'].replace({'FY16' :'2016', 'FY21':'2021', 'FY20':'2020', 'FY19':'2019', 'FY23':'2023', 'FY24':'2024', 'FY2024':'2024', 'FY17':'2017', 'Q3 FY15':'2015', 'Q1 FY18' : '2018', 'Q1 FY21':'2021'})
    df['quarter'] = df['quarter'].replace({'First Quarter': 'Q1', '2': 'Q2', 'Second Quarter': 'Q2', 'Third Quarter': 'Q3', '3rd Quarter':'Q3', '1':'Q1', '4th':'Q4', '2nd Quarter':'Q2', '2Q':'Q2', '3Q':'Q3', 'Fourth Quarter' : 'Q4','Fourth':'Q4','fourth':'Q4', 'Second':'Q2','4': 'Q4','First':'Q1', '4Q':'Q4','4':'Q4', 'Third':'Q3', '1Q':'Q1'})
    
    # Save the rearranged DataFrame to a new CSV file
    print(f"cleaned data saved")# to {output_file}.")
    return df

def rearrange_df(df):
    # Convert 'quarter' and 'year' columns into a single 'Quarter-Year' column with an underscore
    df['Quarter-Year'] = df['quarter'] + "_" + df['year'].astype(str)

    # Pivot the DataFrame
    pivot_df = df.pivot_table(index='company', columns='Quarter-Year', values='capex', aggfunc='first')

    # Reset the index to make 'company' a column instead of an index
    pivot_df.reset_index(inplace=True)

    # Define a function to sort columns in the desired order
    def sort_columns(df):
        # Extract the current columns
        columns = df.columns.tolist()

        # Extract the company column and the Quarter-Year columns
        company_col = columns[0]
        quarter_cols = columns[1:]

        # Generate sorted columns list: Start with the most recent quarters
        sorted_quarters = sorted(quarter_cols, key=lambda x: (x.split('_')[1], x.split('_')[0]), reverse=True)

        # Combine sorted columns with the company column
        sorted_columns = [company_col] + sorted_quarters
        return sorted_columns

    # Reorder the columns
    sorted_columns = sort_columns(pivot_df)
    pivot_df = pivot_df[sorted_columns]
    pivot_df['company'].unique()
    pivot_df['company'] = pivot_df['company'].str.replace('"', ' ', regex=False)
    # # Save the rearranged DataFrame to a new CSV file
    # output_file = pivot_df
    # "rearranged_financial_analysis_results1.csv"
    # pivot_df.to_csv(output_file, index=False)

    print(f"Rearranged data saved")# to {output_file}.")
    return pivot_df

def final_process(df):
    # Define a function to compare capex values between consecutive columns
    def compare_columns(row):
        comparisons = []
        for i in range(1, len(row) - 1):  # Skip the 'company' column
            current_value = row[i]
            next_value = row[i + 1]

            if pd.isna(current_value) or pd.isna(next_value):
                comparisons.append("DNA")#("Data not available")
            else:
                try:
                    current_value = float(current_value)
                    next_value = float(next_value)
                    if next_value < current_value:
                        comparisons.append("Increase")
                    elif next_value > current_value:
                        comparisons.append("Decrease")
                    else:
                        comparisons.append("Unchanged")
                except ValueError:
                    comparisons.append("DNA")#("Data not available")

        return comparisons

    # Apply the comparison function to each row
    comparison_results = df.apply(lambda row: compare_columns(row), axis=1)

    # Replace the original values in the DataFrame with the comparison results
    for i, col in enumerate(df.columns[1:-1]):  # Exclude 'company' and the last quarter
        df[col] = comparison_results.apply(lambda x: x[i])

    # # Save the comparison results to a new CSV file
    # output_file = "capex_comparison_results.csv"
    # df.to_csv(output_file, index=False)

    print(f"Capex comparison results saved") #to {output_file}.")
    return df

In [3]:

company_df = []
raw_company_df = []
raw_json = []


In [4]:
all_companies = [
"Alphabet",
"Amazon",
"Berkshire Hathaway",
"Cardinal Health",
"Centene",
"Chevron",
"Comcast",
"ExxonMobil",
"Tesla",
"UnitedHealth Group",
"Valero Energy",
"Walgreens Boots Alliance",
"Wallmart"
]
# all_companies = ["Comcast"]
for company in all_companies:
    print(f"{'#'*10}processing--{company}{'#'*10}")
    csv_df, json_data = get_capex_info([company])
    raw_json.append(json_data)
    cleaned_df = clean_df(csv_df)
    rearranged_df = rearrange_df(cleaned_df)
    rearranged_df.to_csv(f"{company}.csv")
    raw_company_df.append(rearranged_df.copy(deep=True))
    final_df = final_process(rearranged_df)
    final_df.to_csv(f"final_{company}.csv")
    company_df.append(final_df.copy(deep=True))
    print(f"{'$'*10}processed--{company}{'$'*10}")

##########processing--Alphabet##########
NUMBER OF FILES: 14
File_NAME = Alphabet/2021/2021q1-alphabet-earnings-release.pdf
*************API - 0 - IN USE***************
done - ```json
{
  "company": "Alphabet",
  "quarter": "1",
  "year": "2021",
  "capex": "5.942"
}
```
File_NAME = Alphabet/2021/2021q2-alphabet-earnings-release.pdf
1 files completed.
{'company': 'Alphabet', 'quarter': '1', 'year': '2021', 'capex': '5.942'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Alphabet",
  "quarter": "2",
  "year": "2021",
  "capex": "5.496"
}
```
File_NAME = Alphabet/2021/2021q3-alphabet-earnings-release.pdf
2 files completed.
{'company': 'Alphabet', 'quarter': '2', 'year': '2021', 'capex': '5.496'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Alphabet",
  "quarter": "3",
  "year": "2021",
  "capex": "6.819"
}
```
File_NAME = Alphabet/2021/2021q4-alphabet-earnings-release.pdf
3 files completed.
{'company': 'Alphabet', 'quarter

  current_value = row[i]
  next_value = row[i + 1]


NUMBER OF FILES: 18
File_NAME = Amazon/2020/AMZN-Q1-2020-Earnings-Release.pdf
*************API - 0 - IN USE***************
done - ```json
{
  "company": "Amazon.com",
  "quarter": "1",
  "year": "2020",
  "capex": null
}
```
Capex is empty. Retrying for blob Amazon/2020/AMZN-Q1-2020-Earnings-Release.pdf...
File_NAME = Amazon/2020/AMZN-Q1-2020-Earnings-Release.pdf
*************API - 0 - IN USE***************
done - ```json
{
  "company": "Amazon.com",
  "quarter": "1",
  "year": "2020",
  "capex": "6.795"
}
```
File_NAME = Amazon/2020/AMZN-Q3-2020-Earnings-Release.pdf
1 files completed.
{'company': 'Amazon.com', 'quarter': '1', 'year': '2020', 'capex': '6.795'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Amazon.com",
  "quarter": "3",
  "year": "2020",
  "capex": "11.063"
}
```
File_NAME = Amazon/2020/Amazon-Q4-2020-Earnings-Release.pdf
2 files completed.
{'company': 'Amazon.com', 'quarter': '3', 'year': '2020', 'capex': '11.063'} 


*************API -

  current_value = row[i]
  next_value = row[i + 1]


NUMBER OF FILES: 39
File_NAME = Berkshire Hathaway/2014/aug0114.pdf
*************API - 0 - IN USE***************
done - ```json
{
  "company": "Berkshire Hathaway Inc.",
  "quarter": "2",
  "year": "2014",
  "capex": null
}
```
Capex is empty. Retrying for blob Berkshire Hathaway/2014/aug0114.pdf...
File_NAME = Berkshire Hathaway/2014/aug0114.pdf
*************API - 0 - IN USE***************
done - ```json
{
  "company": "Berkshire Hathaway Inc.",
  "quarter": "2",
  "year": "2014",
  "capex": null
}
```
Capex is empty. Retrying for blob Berkshire Hathaway/2014/aug0114.pdf...
Max retries reached for blob Berkshire Hathaway/2014/aug0114.pdf.
File_NAME = Berkshire Hathaway/2014/may0214.pdf
*************API - 0 - IN USE***************
done - ```json
{
  "company": "Berkshire Hathaway Inc.",
  "quarter": "1",
  "year": "2014",
  "capex": null
}
```
Capex is empty. Retrying for blob Berkshire Hathaway/2014/may0214.pdf...
File_NAME = Berkshire Hathaway/2014/may0214.pdf
*************API - 0 - 

  current_value = row[i]
  next_value = row[i + 1]


NUMBER OF FILES: 39
File_NAME = Cardinal Health/2015/FINAL-Q2-FY15-Cardinal-Health-Earnings-Transcript_v001_u60495.pdf
*************API - 0 - IN USE***************
done - ```json
{
  "company": "Cardinal Health",
  "quarter": "2",
  "year": "2015",
  "capex": "0.345"
}
```
File_NAME = Cardinal Health/2015/FINAL-Q3-FY15-Cardinal-Health-Earnings-Transcript.pdf
1 files completed.
{'company': 'Cardinal Health', 'quarter': '2', 'year': '2015', 'capex': '0.345'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Cardinal Health, Inc.",
  "quarter": "3",
  "year": "2015",
  "capex": "0.330"
}
```
File_NAME = Cardinal Health/2015/Q1FY15-Cardinal-Health-Earnings-Transcript_Final-(1).pdf
2 files completed.
{'company': 'Cardinal Health, Inc.', 'quarter': '3', 'year': '2015', 'capex': '0.330'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Cardinal Health",
  "quarter": "1",
  "year": "2015",
  "capex": "null"
}
```
File_NAME = Cardinal H

  current_value = row[i]
  next_value = row[i + 1]


NUMBER OF FILES: 43
File_NAME = Centene/2014/2014-04-22-Centene-Corporation-Reports-2014-First-Quarter-Results-Raises-Full-Year-Guidance.pdf
*************API - 0 - IN USE***************
done - ```json
{
  "company": "Centene Corporation",
  "quarter": "1",
  "year": "2014",
  "capex": "0.018"
}
```
File_NAME = Centene/2014/2014-07-22-Centene-Corporation-Reports-2014-Second-Quarter-Results-Raises-Guidance.pdf
1 files completed.
{'company': 'Centene Corporation', 'quarter': '1', 'year': '2014', 'capex': '0.018'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Centene Corporation",
  "quarter": "2",
  "year": "2014",
  "capex": "0.041568"
}
```
File_NAME = Centene/2014/2014-10-28-Centene-Corporation-Reports-2014-Third-Quarter-Results-Raises-Guidance.pdf
2 files completed.
{'company': 'Centene Corporation', 'quarter': '2', 'year': '2014', 'capex': '0.041568'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Centene Corporation",


  current_value = row[i]
  next_value = row[i + 1]


NUMBER OF FILES: 42
File_NAME = Chevron/2014/2014_1Q_Earnings_Press_Release-.pdf
*************API - 0 - IN USE***************
done - ```json
{
  "company": "Chevron Corporation",
  "quarter": "1",
  "year": "2014",
  "capex": "9.431"
}
```
File_NAME = Chevron/2014/2014_2Q_Earnings_Press_Release.pdf
1 files completed.
{'company': 'Chevron Corporation', 'quarter': '1', 'year': '2014', 'capex': '9.431'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Chevron",
  "quarter": "2",
  "year": "2014",
  "capex": "19.616"
}
```
File_NAME = Chevron/2014/2014_3Q_Earnings_Press_Release.pdf
2 files completed.
{'company': 'Chevron', 'quarter': '2', 'year': '2014', 'capex': '19.616'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Chevron",
  "quarter": "3",
  "year": "2014",
  "capex": "29.026"
}
```
File_NAME = Chevron/2014/2014_4Q_Earnings_Press_Release.pdf
3 files completed.
{'company': 'Chevron', 'quarter': '3', 'year': '2014', 'capex'

  current_value = row[i]
  next_value = row[i + 1]


NUMBER OF FILES: 84
File_NAME = Comcast/2014/1Q14 Earnings Release with Tables.pdf
*************API - 0 - IN USE***************
Error Occoured for API 0: 429 Resource has been exhausted (e.g. check quota).
 Retrying...2
done - ```json
{
  "company": "Comcast Corporation",
  "quarter": "1",
  "year": "2014",
  "capex": "1.448"
}
```
File_NAME = Comcast/2014/2Q14 Earnings Release with Tables.pdf
1 files completed.
{'company': 'Comcast Corporation', 'quarter': '1', 'year': '2014', 'capex': '1.448'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Comcast Corporation",
  "quarter": "2",
  "year": "2014",
  "capex": "1.798"
}
```
File_NAME = Comcast/2014/3Q14 Earnings Release with Tables.pdf
2 files completed.
{'company': 'Comcast Corporation', 'quarter': '2', 'year': '2014', 'capex': '1.798'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Comcast Corporation",
  "quarter": "3",
  "year": "2014",
  "capex": "1.950"
}
```
File_NAM

  current_value = row[i]
  next_value = row[i + 1]


NUMBER OF FILES: 42
File_NAME = ExxonMobil/2014/2014-05-01_Exxon_Mobil_Corporation_Announces_Estimated_First__562.pdf
*************API - 0 - IN USE***************
done - ```json
{
  "company": "Exxon Mobil Corporation",
  "quarter": "1",
  "year": "2014",
  "capex": "8.436"
}
```
File_NAME = ExxonMobil/2014/2014-07-31_Exxon_Mobil_Corporation_Announces_Estimated_549.pdf
1 files completed.
{'company': 'Exxon Mobil Corporation', 'quarter': '1', 'year': '2014', 'capex': '8.436'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Exxon Mobil Corporation",
  "quarter": "2",
  "year": "2014",
  "capex": "9.800"
}
```
File_NAME = ExxonMobil/2014/2014-10-31_Exxon_Mobil_Corporation_Announces_Estimated_Third__536.pdf
2 files completed.
{'company': 'Exxon Mobil Corporation', 'quarter': '2', 'year': '2014', 'capex': '9.800'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Exxon Mobil Corporation",
  "quarter": "3",
  "year": "2014",
  "cape

  current_value = row[i]
  next_value = row[i + 1]


NUMBER OF FILES: 42
File_NAME = Tesla/2014/Q1_14_Shareholder_Letter_final.pdf
*************API - 0 - IN USE***************
done - ```json
{
  "company": "Tesla Motors",
  "quarter": "1",
  "year": "2014",
  "capex": "0.141"
}
```
File_NAME = Tesla/2014/Q314_SHL_Final.pdf
1 files completed.
{'company': 'Tesla Motors', 'quarter': '1', 'year': '2014', 'capex': '0.141'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Tesla Motors",
  "quarter": "3",
  "year": "2014",
  "capex": "0.284"
}
```
File_NAME = Tesla/2014/Q4_14_Shareholder_Letter_Final.pdf
2 files completed.
{'company': 'Tesla Motors', 'quarter': '3', 'year': '2014', 'capex': '0.284'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Tesla Motors",
  "quarter": "4",
  "year": "2014",
  "capex": "0.369"
}
```
File_NAME = Tesla/2014/Tesla_Q2_14_Shareholder_Letter.pdf
3 files completed.
{'company': 'Tesla Motors', 'quarter': '4', 'year': '2014', 'capex': '0.369'} 


********

  current_value = row[i]
  next_value = row[i + 1]


NUMBER OF FILES: 42
File_NAME = UnitedHealth Group/2014/UNH-Q1-2014-Release.pdf
*************API - 0 - IN USE***************
done - ```json
{
  "company": "UnitedHealth Group",
  "quarter": "1",
  "year": "2014",
  "capex": "0.353"
}
```
File_NAME = UnitedHealth Group/2014/UNH-Q2-2014-Release.pdf
1 files completed.
{'company': 'UnitedHealth Group', 'quarter': '1', 'year': '2014', 'capex': '0.353'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "UnitedHealth Group",
  "quarter": "2",
  "year": "2014",
  "capex": "0.716"
}
```
File_NAME = UnitedHealth Group/2014/UNH-Q3-2014-Release.pdf
2 files completed.
{'company': 'UnitedHealth Group', 'quarter': '2', 'year': '2014', 'capex': '0.716'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "UnitedHealth Group",
  "quarter": "3",
  "year": "2014",
  "capex": "1.121"
}
```
File_NAME = UnitedHealth Group/2014/UNH-Q4-2014-Release.pdf
3 files completed.
{'company': 'UnitedHealth Group', '

  current_value = row[i]
  next_value = row[i + 1]


NUMBER OF FILES: 39
File_NAME = Valero Energy/2014/4Q14-VLO-Earnings-Release.pdf
*************API - 0 - IN USE***************
done - ```json
{
  "company": "Valero Energy Corporation",
  "quarter": "4",
  "year": "2014",
  "capex": "2.800"
}
```
File_NAME = Valero Energy/2015/1Q15-VLO-Earnings-Release.pdf
1 files completed.
{'company': 'Valero Energy Corporation', 'quarter': '4', 'year': '2014', 'capex': '2.800'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Valero Energy Corporation",
  "quarter": "1",
  "year": "2015",
  "capex": "0.698"
}
```
File_NAME = Valero Energy/2015/2Q15-VLO-Earnings-Release.pdf
2 files completed.
{'company': 'Valero Energy Corporation', 'quarter': '1', 'year': '2015', 'capex': '0.698'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Valero Energy Corporation",
  "quarter": "2",
  "year": "2015",
  "capex": "0.530"
}
```
File_NAME = Valero Energy/2015/3Q15-VLO-Earnings-Release.pdf
3 files complet

  current_value = row[i]
  next_value = row[i + 1]


NUMBER OF FILES: 38
File_NAME = Walgreens Boots Alliance/2015/2Q15_Transcript.pdf
*************API - 0 - IN USE***************
done - ```json
{
  "company": "Walgreens Boots Alliance",
  "quarter": "2",
  "year": "2015",
  "capex": null
}
```
Capex is empty. Retrying for blob Walgreens Boots Alliance/2015/2Q15_Transcript.pdf...
File_NAME = Walgreens Boots Alliance/2015/2Q15_Transcript.pdf
*************API - 0 - IN USE***************
done - ```json
{
  "company": "Walgreens Boots Alliance",
  "quarter": "2",
  "year": "2015",
  "capex": null
}
```
Capex is empty. Retrying for blob Walgreens Boots Alliance/2015/2Q15_Transcript.pdf...
Max retries reached for blob Walgreens Boots Alliance/2015/2Q15_Transcript.pdf.
File_NAME = Walgreens Boots Alliance/2015/3Q15_Transcript.pdf
*************API - 0 - IN USE***************
done - ```json
{
  "company": "Walgreens Boots Alliance",
  "quarter": "3",
  "year": "2015",
  "capex": "0.89"
}
```
File_NAME = Walgreens Boots Alliance/2015/4Q15_Transcri

  current_value = row[i]
  next_value = row[i + 1]


NUMBER OF FILES: 60
File_NAME = Wallmart/2010/1Q2009.pdf
*************API - 0 - IN USE***************
done - ```json
{
  "company": "Wal-Mart Stores, Inc.",
  "quarter": "1",
  "year": "2010",
  "capex": "13.000"
}
```
File_NAME = Wallmart/2010/2Q2009.pdf
1 files completed.
{'company': 'Wal-Mart Stores, Inc.', 'quarter': '1', 'year': '2010', 'capex': '13.000'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Wal-Mart Stores, Inc.",
  "quarter": "2",
  "year": "2010",
  "capex": "13.0"
}
```
File_NAME = Wallmart/2010/3Q09_transcript.pdf
2 files completed.
{'company': 'Wal-Mart Stores, Inc.', 'quarter': '2', 'year': '2010', 'capex': '13.0'} 


*************API - 0 - IN USE***************
done - ```json
{
  "company": "Wal-Mart Stores, Inc.",
  "quarter": "3",
  "year": "2010",
  "capex": "12.8"
}
```
File_NAME = Wallmart/2010/4Q10_transcript.pdf
3 files completed.
{'company': 'Wal-Mart Stores, Inc.', 'quarter': '3', 'year': '2010', 'capex': '12.8'} 


******

  current_value = row[i]
  next_value = row[i + 1]


In [5]:
final_analysis = pd.concat(company_df)
final_analysis.to_csv("final_analysis_final_full.csv")

In [6]:

raw_company_analysis = pd.concat(raw_company_df)
raw_company_analysis.to_csv("final_raw_company_analysis__final_full.csv")