In [36]:
import pandas as pd

# Intro (section 9.125):

- one of pandas's most powerful features, but also one of its most difficult to grasp for beginners
- groupby is created from a df, and it creates groupings that are based on common values in certain columns
- best seen in action
- the ideal thing to group by is a column that has duplicate values
- creates multiple df for each unique value in the column that you are sorting by
- the object that is created contains all of the dfs for the unique columns
- ^ a 'container of df', each df will have a central theme
- a groupby object is basically useless until we call methods on it

In [5]:
fortune = pd.read_csv("fortune1000.csv", index_col = "Rank")
sectors = fortune.groupby("Sector")
fortune.head(3)
# fortune and sectors are completely separate objects, DataFrame and DataFrameGroupBy are not the same
# no point in grouping by company bc every value is unique, but sector is a good option to group by

Unnamed: 0_level_0,Company,Sector,Industry,Location,Revenue,Profits,Employees
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Walmart,Retailing,General Merchandisers,"Bentonville, AR",482130,14694,2300000
2,Exxon Mobil,Energy,Petroleum Refining,"Irving, TX",246204,16150,75600
3,Apple,Technology,"Computers, Office Equipment","Cupertino, CA",233715,53394,110000


In [4]:
sectors = fortune.groupby("Sector")
# can give groupby a column as a string or a list of columns as strings
sectors

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fb163c389d0>

# First Operations with the groupby object (section 9.126):

- len(): gives the number of groupings
- .size(): gives a series where the index = groupings, and the values = number of rows in each grouping similar to calling .value_counts onto a df
- .first(): extracts the first row from every grouping (first in the way the df is currently sorted)
- .last(): extracts the last row from every grouping
- .groups (attribute): gives a dictionary where the keys are the groupings and the values are a list of rows within each grouping, its a big picture overview

In [6]:
fortune = pd.read_csv("fortune1000.csv", index_col = "Rank")
sectors = fortune.groupby("Sector")
fortune.head(3)

Unnamed: 0_level_0,Company,Sector,Industry,Location,Revenue,Profits,Employees
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Walmart,Retailing,General Merchandisers,"Bentonville, AR",482130,14694,2300000
2,Exxon Mobil,Energy,Petroleum Refining,"Irving, TX",246204,16150,75600
3,Apple,Technology,"Computers, Office Equipment","Cupertino, CA",233715,53394,110000


In [8]:
len(fortune)
len(sectors)
# when you pass a df into len(), you get the number of rows
# when you pass a groupby object into len(), you get the number of groupings

21

In [9]:
fortune["Sector"].nunique()
# proves that the sectors object has the same length as the number of unique values in the Sector column of fortune

21

In [10]:
sectors.size()
# sorted alphabetically by column name

Sector
Aerospace & Defense              20
Apparel                          15
Business Services                51
Chemicals                        30
Energy                          122
Engineering & Construction       26
Financials                      139
Food and Drug Stores             15
Food, Beverages & Tobacco        43
Health Care                      75
Hotels, Resturants & Leisure     25
Household Products               28
Industrials                      46
Materials                        43
Media                            25
Motor Vehicles & Parts           24
Retailing                        80
Technology                      102
Telecommunications               15
Transportation                   36
Wholesalers                      40
dtype: int64

In [11]:
fortune["Sector"].value_counts()
# sorted from highest value to lowest value

Financials                      139
Energy                          122
Technology                      102
Retailing                        80
Health Care                      75
Business Services                51
Industrials                      46
Materials                        43
Food, Beverages & Tobacco        43
Wholesalers                      40
Transportation                   36
Chemicals                        30
Household Products               28
Engineering & Construction       26
Media                            25
Hotels, Resturants & Leisure     25
Motor Vehicles & Parts           24
Aerospace & Defense              20
Telecommunications               15
Apparel                          15
Food and Drug Stores             15
Name: Sector, dtype: int64

In [12]:
sectors.first()
# good for extracting a quick sample from the very top of each grouping

Unnamed: 0_level_0,Company,Industry,Location,Revenue,Profits,Employees
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aerospace & Defense,Boeing,Aerospace and Defense,"Chicago, IL",96114,5176,161400
Apparel,Nike,Apparel,"Beaverton, OR",30601,3273,62600
Business Services,ManpowerGroup,Temporary Help,"Milwaukee, WI",19330,419,27000
Chemicals,Dow Chemical,Chemicals,"Midland, MI",48778,7685,49495
Energy,Exxon Mobil,Petroleum Refining,"Irving, TX",246204,16150,75600
Engineering & Construction,Fluor,"Engineering, Construction","Irving, TX",18114,413,38758
Financials,Berkshire Hathaway,Insurance: Property and Casualty (Stock),"Omaha, NE",210821,24083,331000
Food and Drug Stores,CVS Health,Food and Drug Stores,"Woonsocket, RI",153290,5237,199000
"Food, Beverages & Tobacco",Archer Daniels Midland,Food Production,"Chicago, IL",67702,1849,32300
Health Care,McKesson,Wholesalers: Health Care,"San Francisco, CA",181241,1476,70400


In [13]:
sectors.last()
# good for extracting a quick sample from the very bottom of each grouping

Unnamed: 0_level_0,Company,Industry,Location,Revenue,Profits,Employees
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aerospace & Defense,Delta Tucker Holdings,Aerospace and Defense,"McLean, VA",1923,-133,12000
Apparel,Guess,Apparel,"Los Angeles, CA",2204,82,13500
Business Services,DeVry Education Group,Education,"Downers Grove, IL",1910,140,11770
Chemicals,H.B. Fuller,Chemicals,"St. Paul, MN",2084,87,4425
Energy,Portland General Electric,Utilities: Gas and Electric,"Portland, OR",1898,172,2646
Engineering & Construction,MDC Holdings,Homebuilders,"Denver, CO",1909,66,1225
Financials,New York Community Bancorp,Commercial Banks,"Westbury, NY",1902,-47,3448
Food and Drug Stores,Fred’s,Food and Drug Stores,"Memphis, TN",2151,-7,7103
"Food, Beverages & Tobacco",Alliance One International,Tobacco,"Morrisville, NC",2066,-15,6835
Health Care,Providence Service,Health Care: Pharmacy and Other Services,"Tucson, AZ",1987,84,9072


In [15]:
fortune.loc[24]

Company                     Boeing
Sector         Aerospace & Defense
Industry     Aerospace and Defense
Location               Chicago, IL
Revenue                      96114
Profits                       5176
Employees                   161400
Name: 24, dtype: object

In [14]:
sectors.groups
# 24 is correct according to fortune.loc[]

{'Aerospace & Defense': [24, 45, 60, 88, 118, 120, 209, 245, 282, 378, 389, 490, 560, 605, 785, 788, 836, 903, 958, 987], 'Apparel': [91, 231, 340, 354, 448, 547, 575, 597, 683, 695, 726, 794, 877, 882, 917], 'Business Services': [144, 186, 199, 204, 221, 248, 249, 294, 307, 312, 355, 392, 404, 440, 467, 468, 481, 485, 492, 503, 545, 626, 635, 652, 677, 694, 714, 729, 734, 735, 737, 744, 767, 776, 777, 783, 791, 792, 796, 801, 803, 816, 819, 820, 869, 870, 886, 939, 951, 952, 993], 'Chemicals': [56, 101, 182, 189, 206, 253, 262, 277, 288, 296, 316, 538, 549, 555, 566, 580, 613, 624, 654, 668, 717, 720, 724, 758, 761, 829, 865, 898, 934, 949], 'Energy': [2, 14, 30, 32, 42, 65, 90, 95, 98, 104, 115, 117, 121, 162, 163, 165, 166, 175, 178, 188, 190, 192, 193, 198, 214, 216, 217, 223, 225, 229, 243, 246, 247, 257, 272, 274, 279, 289, 319, 322, 324, 343, 348, 349, 350, 363, 364, 384, 387, 388, 394, 402, 403, 410, 425, 437, 438, 445, 458, 475, 483, 493, 507, 522, 541, 548, 556, 558, 569, 571

# Retrieve a group with the .get_group() method (section 9.127):

- Called on the groupby object
- takes an argument of a string of the group you want returned
- Returns a df of the called group in the format of the original df, index keeps original df values
- very powerful because of the amount work/lines of code it saves

In [16]:
fortune = pd.read_csv("fortune1000.csv", index_col = "Rank")
sectors = fortune.groupby("Sector")
fortune.head(3)

Unnamed: 0_level_0,Company,Sector,Industry,Location,Revenue,Profits,Employees
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Walmart,Retailing,General Merchandisers,"Bentonville, AR",482130,14694,2300000
2,Exxon Mobil,Energy,Petroleum Refining,"Irving, TX",246204,16150,75600
3,Apple,Technology,"Computers, Office Equipment","Cupertino, CA",233715,53394,110000


In [20]:
sectors.get_group("Energy")
sectors.get_group("Technology")
sectors.get_group("Apparel")

Unnamed: 0_level_0,Company,Sector,Industry,Location,Revenue,Profits,Employees
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
91,Nike,Apparel,Apparel,"Beaverton, OR",30601,3273,62600
231,VF,Apparel,Apparel,"Greensboro, NC",12377,1232,64000
340,PVH,Apparel,Apparel,"New York, NY",8020,572,26200
354,Ralph Lauren,Apparel,Apparel,"New York, NY",7620,702,20000
448,Hanesbrands,Apparel,Apparel,"Winston-Salem, NC",5732,429,65300
547,Levi Strauss,Apparel,Apparel,"San Francisco, CA",4495,209,12500
575,Coach,Apparel,Apparel,"New York, NY",4192,402,12950
597,Under Armour,Apparel,Apparel,"Baltimore, MD",3963,233,9600
683,Fossil Group,Apparel,Apparel,"Richardson, TX",3229,221,15100
695,Skechers U.S.A.,Apparel,Apparel,"Manhattan Beach, CA",3159,232,6400


# Methods on the groupby object and DataFrame Columns (section 9.128):

- .max(): looks at the leftmost column and extracts the largest value (nearest to Z for alphabet)
- .min(): looks at the leftmost column and extracts the smallest value (nearest to A for alphabet)
- .sum(): only applies to numeric columns, creates a df with the sums for each grouping
- .mead(): only applies to numeric columns, creates a df with the average for each grouping

In [25]:
fortune = pd.read_csv("fortune1000.csv", index_col = "Rank")
sectors = fortune.groupby("Sector")
fortune.head(3)

Unnamed: 0_level_0,Company,Sector,Industry,Location,Revenue,Profits,Employees
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Walmart,Retailing,General Merchandisers,"Bentonville, AR",482130,14694,2300000
2,Exxon Mobil,Energy,Petroleum Refining,"Irving, TX",246204,16150,75600
3,Apple,Technology,"Computers, Office Equipment","Cupertino, CA",233715,53394,110000


In [29]:
sectors.max()
sectors.min()

Unnamed: 0_level_0,Company,Industry,Location,Revenue,Profits,Employees
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aerospace & Defense,B/E Aerospace,Aerospace and Defense,"Berwyn, PA",1923,-240,6955
Apparel,Carter’s,Apparel,"Atlanta, GA",2204,82,5978
Business Services,ABM Industries,"Advertising, marketing","Arlington, VA",1910,-1481,2400
Chemicals,A. Schulman,Chemicals,"Allentown, PA",2084,-816,1979
Energy,AES,Energy,"Akron, OH",1898,-23119,480
Engineering & Construction,AECOM,"Engineering, Construction","Atlanta, GA",1909,-155,1036
Financials,AIG,Commercial Banks,"Atlanta, GA",1902,-1194,187
Food and Drug Stores,CVS Health,Food and Drug Stores,"Austin, TX",2151,-62,1616
"Food, Beverages & Tobacco",Alliance One International,Beverages,"Arden Hills, MN",2066,-253,1857
Health Care,AbbVie,Health Care: Insurance and Managed Care,"Abbott Park, IL",1987,-458,2924


In [30]:
sectors.sum()

Unnamed: 0_level_0,Revenue,Profits,Employees
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Aerospace & Defense,357940,28742,968057
Apparel,95968,8236,346397
Business Services,272195,28227,1361050
Chemicals,243897,22628,463651
Energy,1517809,-73447,1188927
Engineering & Construction,153983,5304,406708
Financials,2217159,260209,3359948
Food and Drug Stores,483769,16759,1395398
"Food, Beverages & Tobacco",555967,51417,1211632
Health Care,1614707,106114,2678289


In [31]:
sectors.mean()

Unnamed: 0_level_0,Revenue,Profits,Employees
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Aerospace & Defense,17897.0,1437.1,48402.85
Apparel,6397.866667,549.066667,23093.133333
Business Services,5337.156863,553.470588,26687.254902
Chemicals,8129.9,754.266667,15455.033333
Energy,12441.057377,-602.02459,9745.303279
Engineering & Construction,5922.423077,204.0,15642.615385
Financials,15950.784173,1872.007194,24172.28777
Food and Drug Stores,32251.266667,1117.266667,93026.533333
"Food, Beverages & Tobacco",12929.465116,1195.744186,28177.488372
Health Care,21529.426667,1414.853333,35710.52


In [35]:
sectors["Revenue"].sum()
# retuns a series of all of the revenue sums for each group
sectors["Employees"].sum()
sectors["Profits"].max()

sectors[["Revenue", "Profits"]].sum()
# returns a df with the columns listed

Unnamed: 0_level_0,Revenue,Profits
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1
Aerospace & Defense,357940,28742
Apparel,95968,8236
Business Services,272195,28227
Chemicals,243897,22628
Energy,1517809,-73447
Engineering & Construction,153983,5304
Financials,2217159,260209
Food and Drug Stores,483769,16759
"Food, Beverages & Tobacco",555967,51417
Health Care,1614707,106114


# Grouping by Multiple Columns (section 9.129):

- can give .groupby() a list
- given in alphabetical order

In [38]:
fortune = pd.read_csv("fortune1000.csv", index_col = "Rank")
sectors = fortune.groupby(["Sector", "Industry"])
fortune.head(3)

Unnamed: 0_level_0,Company,Sector,Industry,Location,Revenue,Profits,Employees
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Walmart,Retailing,General Merchandisers,"Bentonville, AR",482130,14694,2300000
2,Exxon Mobil,Energy,Petroleum Refining,"Irving, TX",246204,16150,75600
3,Apple,Technology,"Computers, Office Equipment","Cupertino, CA",233715,53394,110000


In [40]:
sectors.size()
# companies now have more than one criteria to refer to them by
sectors.sum()
# now all of the sums that are displayed are the sum values for each industry within each sector

Unnamed: 0_level_0,Unnamed: 1_level_0,Revenue,Profits,Employees
Sector,Industry,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Aerospace & Defense,Aerospace and Defense,357940,28742,968057
Apparel,Apparel,95968,8236,346397
Business Services,"Advertising, marketing",22748,1549,124100
Business Services,Diversified Outsourcing Services,64829,4305,708330
Business Services,Education,7485,69,46755
...,...,...,...,...
Transportation,"Trucking, Truck Leasing",35950,1910,170456
Wholesalers,Miscellaneous,8982,17,9200
Wholesalers,Wholesalers: Diversified,176138,5193,233831
Wholesalers,Wholesalers: Electronics and Office Equipment,147906,1857,166661


In [42]:
sectors["Revenue"].sum()
# lets you focus on just one column of the groupby object instead of all of them
sectors["Employees"].mean()

Sector               Industry                                     
Aerospace & Defense  Aerospace and Defense                            48402.850000
Apparel              Apparel                                          23093.133333
Business Services    Advertising, marketing                           62050.000000
                     Diversified Outsourcing Services                 50595.000000
                     Education                                        15585.000000
                                                                          ...     
Transportation       Trucking, Truck Leasing                          18939.555556
Wholesalers          Miscellaneous                                     9200.000000
                     Wholesalers: Diversified                          9353.240000
                     Wholesalers: Electronics and Office Equipment    20832.625000
                     Wholesalers: Food and Grocery                    19317.500000
Name: Employees, Len

# The .agg() method (section 9.130):

- allows us to call different aggrigation methods on the columns within our df
- called directly on a groupby object
- specify the column and what operation we want to perform on it
- allows us to perfrom different operations on each column
- takes a python dictionary as an argument, key = column name, value = method or operation we want to aggregate on
- put the column names and methods as strings
- can also provide a list to do multiple operations on all columns in the groupby object
- can provide a list and a dictionary to do multiple operations on specific columns

In [43]:
fortune = pd.read_csv("fortune1000.csv", index_col = "Rank")
sectors = fortune.groupby("Sector")
fortune.head(3)

Unnamed: 0_level_0,Company,Sector,Industry,Location,Revenue,Profits,Employees
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Walmart,Retailing,General Merchandisers,"Bentonville, AR",482130,14694,2300000
2,Exxon Mobil,Energy,Petroleum Refining,"Irving, TX",246204,16150,75600
3,Apple,Technology,"Computers, Office Equipment","Cupertino, CA",233715,53394,110000


In [44]:
sectors.agg({
    "Revenue" : "sum",
    "Profits": "sum",
    "Employees": "mean"
    })

Unnamed: 0_level_0,Revenue,Profits,Employees
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Aerospace & Defense,357940,28742,48402.85
Apparel,95968,8236,23093.133333
Business Services,272195,28227,26687.254902
Chemicals,243897,22628,15455.033333
Energy,1517809,-73447,9745.303279
Engineering & Construction,153983,5304,15642.615385
Financials,2217159,260209,24172.28777
Food and Drug Stores,483769,16759,93026.533333
"Food, Beverages & Tobacco",555967,51417,28177.488372
Health Care,1614707,106114,35710.52


In [46]:
sectors.agg(["size", "sum", "mean"])
# performs all three operations in the list on all of the columns in the groupby object

Unnamed: 0_level_0,Revenue,Revenue,Revenue,Profits,Profits,Profits,Employees,Employees,Employees
Unnamed: 0_level_1,size,sum,mean,size,sum,mean,size,sum,mean
Sector,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Aerospace & Defense,20,357940,17897.0,20,28742,1437.1,20,968057,48402.85
Apparel,15,95968,6397.866667,15,8236,549.066667,15,346397,23093.133333
Business Services,51,272195,5337.156863,51,28227,553.470588,51,1361050,26687.254902
Chemicals,30,243897,8129.9,30,22628,754.266667,30,463651,15455.033333
Energy,122,1517809,12441.057377,122,-73447,-602.02459,122,1188927,9745.303279
Engineering & Construction,26,153983,5922.423077,26,5304,204.0,26,406708,15642.615385
Financials,139,2217159,15950.784173,139,260209,1872.007194,139,3359948,24172.28777
Food and Drug Stores,15,483769,32251.266667,15,16759,1117.266667,15,1395398,93026.533333
"Food, Beverages & Tobacco",43,555967,12929.465116,43,51417,1195.744186,43,1211632,28177.488372
Health Care,75,1614707,21529.426667,75,106114,1414.853333,75,2678289,35710.52


In [48]:
sectors.agg({"Revenue": ["sum", "mean"],
            "Profits": "mean",
            "Employees": ["max", "min"]
            })

Unnamed: 0_level_0,Revenue,Revenue,Profits,Employees,Employees
Unnamed: 0_level_1,sum,mean,mean,max,min
Sector,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Aerospace & Defense,357940,17897.0,1437.1,197200,6955
Apparel,95968,6397.866667,549.066667,65300,5978
Business Services,272195,5337.156863,553.470588,216500,2400
Chemicals,243897,8129.9,754.266667,52000,1979
Energy,1517809,12441.057377,-602.02459,75600,480
Engineering & Construction,153983,5922.423077,204.0,92000,1036
Financials,2217159,15950.784173,1872.007194,331000,187
Food and Drug Stores,483769,32251.266667,1117.266667,431000,1616
"Food, Beverages & Tobacco",555967,12929.465116,1195.744186,263000,1857
Health Care,1614707,21529.426667,1414.853333,203500,2924


# Iterating through Groups (section 9.131):

- example: find the companies with max profits and show the entire row

In [49]:
fortune = pd.read_csv("fortune1000.csv", index_col = "Rank")
sectors = fortune.groupby("Sector")
fortune.head(3)

Unnamed: 0_level_0,Company,Sector,Industry,Location,Revenue,Profits,Employees
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Walmart,Retailing,General Merchandisers,"Bentonville, AR",482130,14694,2300000
2,Exxon Mobil,Energy,Petroleum Refining,"Irving, TX",246204,16150,75600
3,Apple,Technology,"Computers, Office Equipment","Cupertino, CA",233715,53394,110000


In [50]:
# step one: creating an empty df with the column names we need
df = pd.DataFrame(columns = fortune.columns)
# the columns = fortune.columns part ensures that our column names are the same as the ones in the fortune df
df

Unnamed: 0,Company,Sector,Industry,Location,Revenue,Profits,Employees


In [52]:
# step two: loop through all of our groups to find the company with the highest revenue, append it to df
# need to provide two temporary variable names to loop over a groupby object
# the first temp variable is the group, and the second is the data that will be stored within that grouping
for sector, data in sectors:
    highest_revenue_company_in_group = data.nlargest(1, "Revenue")
    df = df.append(highest_revenue_company_in_group)

In [53]:
df
# this df has the info for the companies with the highest revenue for each sector

Unnamed: 0,Company,Sector,Industry,Location,Revenue,Profits,Employees
24,Boeing,Aerospace & Defense,Aerospace and Defense,"Chicago, IL",96114,5176,161400
91,Nike,Apparel,Apparel,"Beaverton, OR",30601,3273,62600
144,ManpowerGroup,Business Services,Temporary Help,"Milwaukee, WI",19330,419,27000
56,Dow Chemical,Chemicals,Chemicals,"Midland, MI",48778,7685,49495
2,Exxon Mobil,Energy,Petroleum Refining,"Irving, TX",246204,16150,75600
155,Fluor,Engineering & Construction,"Engineering, Construction","Irving, TX",18114,413,38758
4,Berkshire Hathaway,Financials,Insurance: Property and Casualty (Stock),"Omaha, NE",210821,24083,331000
7,CVS Health,Food and Drug Stores,Food and Drug Stores,"Woonsocket, RI",153290,5237,199000
41,Archer Daniels Midland,"Food, Beverages & Tobacco",Food Production,"Chicago, IL",67702,1849,32300
5,McKesson,Health Care,Wholesalers: Health Care,"San Francisco, CA",181241,1476,70400


In [68]:
cities = fortune.groupby("Location")
df = pd.DataFrame(columns = fortune.columns)
df

Unnamed: 0,Company,Sector,Industry,Location,Revenue,Profits,Employees


In [69]:
for city, data in cities:
    highest_profit_ctiy = data.nlargest(1, "Revenue")
    df = df.append(highest_profit_ctiy)
df

Unnamed: 0,Company,Sector,Industry,Location,Revenue,Profits,Employees
138,Abbott Laboratories,Health Care,Medical Products and Equipment,"Abbott Park, IL",20661,4423,74000
169,Goodyear Tire & Rubber,Motor Vehicles & Parts,Motor Vehicles and Parts,"Akron, OH",16443,307,66000
288,Air Products & Chemicals,Chemicals,Chemicals,"Allentown, PA",9895,1278,19550
830,Benchmark Electronics,Technology,Semiconductors and Other Electronic Components,"Angleton, TX",2541,95,10500
374,Casey’s General Stores,Retailing,Specialty Retailers: Other,"Ankeny, IA",7052,181,22408
...,...,...,...,...,...,...,...
7,CVS Health,Food and Drug Stores,Food and Drug Stores,"Woonsocket, RI",153290,5237,199000
506,Hanover Insurance Group,Financials,Insurance: Property and Casualty (Stock),"Worcester, MA",5034,332,4800
764,Penn National Gaming,"Hotels, Resturants & Leisure","Hotels, Casinos, Resorts","Wyomissing, PA",2838,1,18204
773,Bon-Ton Stores,Retailing,General Merchandisers,"York, PA",2790,-57,24100
