# Here we merge our cleaned data

## Importing libraries

In [1]:
# Basics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
from datetime import date

## Loading the Datasets

In [2]:
df_industries = pd.read_csv('Prepared Frames/industry_data.csv')
df_companies = pd.read_csv('Prepared Frames/company_data.csv')
df_balance = pd.read_csv('Prepared Frames/balance_data.csv')
df_cashflow = pd.read_csv('Prepared Frames/cashflow_data.csv')

In [3]:
display(df_industries.head(1))
display(df_companies.head(1))
display(df_balance.head(1))
display(df_cashflow.head(1))

Unnamed: 0,IndustryId,Sector,Industry
0,100001,Industrials,Industrial Products


Unnamed: 0,Ticker,Company Name,IndustryId
0,A,AGILENT TECHNOLOGIES INC,106001.0


Unnamed: 0,Ticker,Fiscal Year,Fiscal Period,Report Date,Shares (Diluted),"Cash, Cash Equivalents & Short Term Investments",Accounts & Notes Receivable,Total Current Assets,"Property, Plant & Equipment, Net",Other Long Term Assets,Total Noncurrent Assets,Total Assets,Payables & Accruals,Total Current Liabilities,Total Noncurrent Liabilities,Total Liabilities,Share Capital & Additional Paid-In Capital,Retained Earnings,Total Equity
0,A,2017,Q1,2017-01-31,326000000.0,2241000000.0,653000000.0,3635000000.0,653000000.0,3451000000.0,4237000000.0,7872000000,268000000.0,1089000000,2483000000.0,3572000000,5239000000.0,-453000000.0,4300000000.0


Unnamed: 0,Ticker,Fiscal Year,Fiscal Period,Net Income/Starting Line,Depreciation & Amortization,Non-Cash Items,Change in Working Capital,Net Cash from Operating Activities,Change in Fixed Assets & Intangibles,Net Cash from Acquisitions & Divestitures,Net Cash from Investing Activities,Cash from (Repayment of) Debt,Cash from (Repurchase of) Equity,Net Cash from Financing Activities,Net Change in Cash
0,A,2017,Q1,168000000.0,55000000.0,45000000.0,-152000000.0,116000000.0,-32000000.0,-69000000.0,-101000000.0,89000000.0,-93000000.0,-58000000.0,-48000000


## Merging

#### Cashflow and Balance Sheet
We will have to merge the cashflow and balance sheet data on the Ticker, Fiscal Year and Fiscal Period, to get the related reports
together.

In [4]:
print(df_balance.shape)
print(df_cashflow.shape)

(37222, 19)
(37499, 15)


In [5]:
df_statements = pd.merge(left = df_balance,
                                 right = df_cashflow,
                                 # We use outer to not loose Information, we will have to get rid of some NaNs later
                                 how = 'outer', 
                                 left_on = ['Ticker','Fiscal Year','Fiscal Period'], 
                                 right_on= ['Ticker','Fiscal Year','Fiscal Period'])
df_statements.shape

(37499, 31)

In [10]:
# df_statements.isna().sum()
# We created some rows with NaNs, we cannot get any additional information at this point and have to drop them.
df_statements = df_statements[df_statements['Report Date'].isna() == False]
df_statements.shape

(37222, 31)

## Names and Industries
Now we will add the information from our companies dataframe, especially the name is needed, to assign the ratings later

In [12]:
df_main = pd.merge(left = df_companies,
                                 right = df_statements,
                                 # We use outer to not loose Information, we will have to get rid of some NaNs later
                                 how = 'right', 
                                 left_on = ['Ticker'], 
                                 right_on= ['Ticker'])
df_main.shape

(37222, 33)

In [14]:
# We will also add the Industry names, since it makes the analysis better understandable than the Industry IDs
df_main = pd.merge(left = df_main,
                                 right = df_industries,
                                 # We use outer to not loose Information, we will have to get rid of some NaNs later
                                 how = 'left', 
                                 left_on = ['IndustryId'], 
                                 right_on= ['IndustryId'])
df_main.shape

(37222, 35)

In [17]:
# df_main.isna().sum()
# No more NaNs were created

df_main.head(50)

Unnamed: 0,Ticker,Company Name,IndustryId,Fiscal Year,Fiscal Period,Report Date,Shares (Diluted),"Cash, Cash Equivalents & Short Term Investments",Accounts & Notes Receivable,Total Current Assets,...,Net Cash from Operating Activities,Change in Fixed Assets & Intangibles,Net Cash from Acquisitions & Divestitures,Net Cash from Investing Activities,Cash from (Repayment of) Debt,Cash from (Repurchase of) Equity,Net Cash from Financing Activities,Net Change in Cash,Sector,Industry
0,A,AGILENT TECHNOLOGIES INC,106001.0,2017,Q1,2017-01-31,326000000.0,2241000000.0,653000000.0,3635000000.0,...,116000000.0,-32000000.0,-69000000.0,-101000000.0,89000000.0,-93000000.0,-58000000.0,-48000000,Healthcare,Medical Diagnostics & Research
1,A,AGILENT TECHNOLOGIES INC,106001.0,2017,Q2,2017-04-30,325000000.0,2389000000.0,677000000.0,3800000000.0,...,257000000.0,-43000000.0,0.0,-43000000.0,52000000.0,-75000000.0,-67000000.0,148000000,Healthcare,Medical Diagnostics & Research
2,A,AGILENT TECHNOLOGIES INC,106001.0,2017,Q3,2017-07-31,326000000.0,2563000000.0,678000000.0,3996000000.0,...,228000000.0,-43000000.0,-57000000.0,-101000000.0,39000000.0,32000000.0,29000000.0,174000000,Healthcare,Medical Diagnostics & Research
3,A,AGILENT TECHNOLOGIES INC,106001.0,2017,Q4,2017-10-31,327000000.0,2678000000.0,724000000.0,4169000000.0,...,288000000.0,-58000000.0,0.0,-60000000.0,-70000000.0,8000000.0,-106000000.0,115000000,Healthcare,Medical Diagnostics & Research
4,A,AGILENT TECHNOLOGIES INC,106001.0,2018,Q1,2018-01-31,323000000.0,2887000000.0,751000000.0,4397000000.0,...,215000000.0,-60000000.0,-6000000.0,-67000000.0,135000000.0,-22000000.0,37000000.0,210000000,Healthcare,Medical Diagnostics & Research
5,A,AGILENT TECHNOLOGIES INC,106001.0,2018,Q2,2018-04-30,326000000.0,3011000000.0,754000000.0,4525000000.0,...,303000000.0,-48000000.0,-1000000.0,-51000000.0,-30000000.0,-35000000.0,-114000000.0,122000000,Healthcare,Medical Diagnostics & Research
6,A,AGILENT TECHNOLOGIES INC,106001.0,2018,Q3,2018-07-31,324000000.0,2131000000.0,733000000.0,3667000000.0,...,197000000.0,-33000000.0,-430000000.0,-472000000.0,-315000000.0,-226000000.0,-589000000.0,-879000000,Healthcare,Medical Diagnostics & Research
7,A,AGILENT TECHNOLOGIES INC,106001.0,2018,Q4,2018-10-31,327000000.0,2247000000.0,776000000.0,3848000000.0,...,372000000.0,-35000000.0,-79000000.0,-115000000.0,0.0,-83000000.0,-131000000.0,115000000,Healthcare,Medical Diagnostics & Research
8,A,AGILENT TECHNOLOGIES INC,106001.0,2019,Q1,2019-01-31,322000000.0,2057000000.0,833000000.0,3712000000.0,...,213000000.0,-39000000.0,-248000000.0,-290000000.0,0.0,-53000000.0,-122000000.0,-190000000,Healthcare,Medical Diagnostics & Research
9,A,AGILENT TECHNOLOGIES INC,106001.0,2019,Q2,2019-04-30,321000000.0,2155000000.0,819000000.0,3812000000.0,...,252000000.0,-39000000.0,0.0,-56000000.0,0.0,-39000000.0,-92000000.0,97000000,Healthcare,Medical Diagnostics & Research


## Saving the concatenated Dataframe

In [18]:
df_main.to_csv('Prepared Frames/main_data.csv', index=False)