# 1. Introduction

In [1]:
import numpy as np
import pandas as pd
# read the data set into a pandas dataframe

f500=pd.read_csv("f500.csv",index_col=0)
f500.index.name=None

# replace 0 values in the "previous_rank" column with NaN
f500.loc[f500["previous_rank"]==0,"previous_rank"]=np.nan

f500_selection=f500[["rank","revenues","revenue_change"]].head(5)

# 2. Reading CSV files with pandas

In [2]:
f500=pd.read_csv("f500.csv")
previous_rank=f500.loc[f500["previous_rank"] == 0, "previous_rank"] = np.nan

# 3. Using iloc to select by integer position

In [3]:
fifth_row=f500.iloc[4]
company_value=f500.iloc[0,0]

# 4. Using iloc to select by integer position continued

|    Select by integer position   |   Explicit Syntax  | Shorthand Convention |
|:-------------------------------:|:------------------:|:--------------------:|
| Single column from dataframe    | df.iloc[:,3]       |                      |
| List of columns from dataframe  | df.iloc[:,[3,5,6]] |                      |
| Slice of columns from dataframe | df.iloc[:,3:7]     |                      |
| Single row from dataframe       | df.iloc[20]        |                      |
| List of rows from dataframe     | df.iloc[[0,3,8]]   |                      |
| Slice of rows from dataframe    | df.iloc[3:5]       | df[3:5]              |
| Single items from series        | s.iloc[8]          | s[8]                 |
| List of item from series        | s.iloc[[2,8,1]]    | s[[2,8,1]]           |
| Slice of items from series      | s.iloc[5:10]       | s[5:10]              |

In [4]:
first_three_rows=f500.iloc[0:3]
first_seventh_row_slice=f500.iloc[[0,6],0:5]

# 5. Using pandas methods to create boolean masks

In [5]:
null_bool=f500[f500["previous_rank"].isnull()]
null_previous_rank=null_bool[["company","rank","previous_rank"]]

# 6. Working with Integer Labels

In [6]:
null_previous_rank = f500[f500["previous_rank"].isnull()]

top5_null_prev_rank=null_previous_rank.iloc[0:5]

# 7. Pandas Index Alignment

In [7]:
previously_ranked=f500[f500["previous_rank"].notnull()]
rank_change=previously_ranked["previous_rank"]-previously_ranked["rank"]

f500["rank_change"]=rank_change

# 8. Using Boolean Operators 

In [8]:
large_revenue=f500["revenues"]>100000
negative_profits=f500["profits"]<0
combined=large_revenue & negative_profits

big_rev_neg_profit=f500[combined]

# 9. Using Boolean Operators Continued

In [9]:
brazil_venezuela=f500[(f500["country"]=="Brazil") | (f500["country"]=="Venezuela")]
tech_outside_usa=f500[~(f500["country"]=="USA") & (f500["sector"]=="Technology")].head()

# 10. Sorting Values

In [10]:
japanese_companies=(f500[f500["country"]=="Japan"]).sort_values("employees",ascending=False).iloc[0]
top_japanese_employer=japanese_companies["company"]

# 11. Using Loops with pandas

In [11]:
top_employer_by_country={}
countries=f500["country"].unique()
for country in countries:
    company=f500[f500["country"]==country].sort_values("employees",ascending=False).iloc[0]
    top_employer_name=company["company"]
    top_employer_by_country[country]=top_employer_name

# 12. Challenge: Calculating Return on Assets by Country

In [12]:
f500["roa"]=f500["profits"]/f500["assets"]
top_roa_by_sector={}
sectors=f500["sector"].unique()

for sector in sectors:
    company=f500[f500["sector"]==sector].sort_values("roa",ascending=False).iloc[0]
    top_roa=company["company"]
    top_roa_by_sector[sector]=top_roa

In [13]:
print(top_roa_by_sector)

{'Retailing': 'H & M Hennes & Mauritz', 'Energy': 'National Grid', 'Motor Vehicles & Parts': 'Subaru', 'Financials': 'Berkshire Hathaway', 'Technology': 'Accenture', 'Wholesalers': 'McKesson', 'Health Care': 'Gilead Sciences', 'Telecommunications': 'KDDI', 'Engineering & Construction': 'Pacific Construction Group', 'Industrials': '3M', 'Food & Drug Stores': 'Publix Super Markets', 'Aerospace & Defense': 'Lockheed Martin', 'Food, Beverages & Tobacco': 'Philip Morris International', 'Household Products': 'Unilever', 'Transportation': 'Delta Air Lines', 'Materials': 'CRH', 'Chemicals': 'LyondellBasell Industries', 'Media': 'Disney', 'Apparel': 'Nike', 'Hotels, Restaurants & Leisure': 'McDonald’s', 'Business Services': 'Adecco Group'}


# 13. Next Steps

In this mission, we learned how to:

Select columns, rows and individual items using their integer location.

Use pd.read_csv() to read CSV files in pandas.

Work with integer axis labels.

How to use pandas methods to produce boolean arrays.

Use boolean operators to combine boolean comparisons to perform more complex analysis.

Use index labels to align data.

Use aggregation to perform advanced analysis using loops.

In the next mission, we'll learn techniques to use when performing data cleaning to prepare a messy data set